diff --git a/others/powerjob-mysql.sql b/others/powerjob-mysql.sql index 2e15d519..c69ef4a5 100644 --- a/others/powerjob-mysql.sql +++ b/others/powerjob-mysql.sql @@ -69,11 +69,9 @@ CREATE TABLE `instance_info` `gmt_create` datetime not NULL COMMENT '创建时间', `gmt_modified` datetime not NULL COMMENT '更新时间', PRIMARY KEY (`id`), - KEY `idx01_instance_info` (`job_id`), - KEY `idx02_instance_info` (`app_id`), - KEY `idx03_instance_info` (`instance_id`), - KEY `idx04_instance_info` (`wf_instance_id`), - KEY `idx05_instance_info` (`expected_trigger_time`) + KEY `idx01_instance_info` (`job_id`, 'status'), + KEY `idx02_instance_info` (`app_id`, `status`), + KEY `idx03_instance_info` (`instance_id`, `status`) ) ENGINE = InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 @@ -110,15 +108,13 @@ CREATE TABLE `job_info` `task_retry_num` int not NULL default 0 COMMENT 'Task重试次数', `time_expression` varchar(255) default NULL COMMENT '时间表达式,内容取决于time_expression_type,1:CRON/2:NULL/3:LONG/4:LONG', `time_expression_type` int not NULL COMMENT '时间表达式类型,1:CRON/2:API/3:FIX_RATE/4:FIX_DELAY,5:WORKFLOW\n)', - `tag` varchar(255) DEFAULT NULL COMMENT 'TAG', - `log_config` varchar(255) DEFAULT NULL COMMENT '日志配置', + `tag` varchar(255) DEFAULT NULL COMMENT 'TAG', + `log_config` varchar(255) DEFAULT NULL COMMENT '日志配置', `extra` varchar(255) DEFAULT NULL COMMENT '扩展字段', `gmt_create` datetime not NULL COMMENT '创建时间', `gmt_modified` datetime not NULL COMMENT '更新时间', PRIMARY KEY (`id`), - KEY `idx01_job_info` (`app_id`), - KEY `idx02_job_info` (`job_name`), - KEY `idx03_job_info` (`next_trigger_time`) + KEY `idx01_job_info` (`app_id`, `status`, `time_expression_type`, `next_trigger_time`) ) ENGINE = InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 @@ -154,7 +150,8 @@ CREATE TABLE `server_info` `gmt_modified` datetime DEFAULT NULL COMMENT '更新时间', `ip` varchar(128) DEFAULT NULL COMMENT '服务器IP地址', PRIMARY KEY (`id`), - UNIQUE KEY `uidx01_server_info` (`ip`) + UNIQUE KEY `uidx01_server_info` (`ip`), + KEY `idx01_server_info` (`gmt_modified`) ) ENGINE = InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 @@ -204,7 +201,7 @@ CREATE TABLE `workflow_info` `gmt_create` datetime DEFAULT NULL COMMENT '创建时间', `gmt_modified` datetime DEFAULT NULL COMMENT '更新时间', PRIMARY KEY (`id`), - KEY `idx01_workflow_info` (`app_id`) + KEY `idx01_workflow_info` (`app_id`, `status`, `time_expression_type`, next_trigger_time) ) ENGINE = InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 @@ -231,9 +228,9 @@ CREATE TABLE `workflow_instance_info` `gmt_create` datetime DEFAULT NULL COMMENT '创建时间', `gmt_modified` datetime DEFAULT NULL COMMENT '更新时间', PRIMARY KEY (`id`), - unique index uidx01_wf_instance (wf_instance_id), - index idx01_wf_instance (workflow_id), - index idx02_wf_instance (app_id, status) + unique index uidx01_wf_instance (`wf_instance_id`), + index idx01_wf_instance (`workflow_id`, `status`), + index idx02_wf_instance (`app_id`, `status`, `expected_trigger_time`) ) ENGINE = InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 @@ -258,9 +255,7 @@ CREATE TABLE `workflow_node_info` `type` int DEFAULT NULL COMMENT '节点类型,1:任务JOB', `workflow_id` bigint DEFAULT NULL COMMENT '工作流ID', PRIMARY KEY (`id`), - KEY `idx01_workflow_node_info` (`app_id`), - KEY `idx02_workflow_node_info` (`workflow_id`), - KEY `idx03_workflow_node_info` (`job_id`) + KEY `idx01_workflow_node_info` (`workflow_id`,`gmt_create`) ) ENGINE = InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8mb4 diff --git a/powerjob-client/pom.xml b/powerjob-client/pom.xml index a285ba7a..94a23dc2 100644 --- a/powerjob-client/pom.xml +++ b/powerjob-client/pom.xml @@ -10,13 +10,13 @@ 4.0.0 powerjob-client - 4.2.0 + 4.2.1 jar - 5.6.1 + 5.9.1 1.2.83 - 4.2.0 + 4.2.1 3.2.4 diff --git a/powerjob-common/pom.xml b/powerjob-common/pom.xml index 444e579e..dcd8d499 100644 --- a/powerjob-common/pom.xml +++ b/powerjob-common/pom.xml @@ -10,18 +10,18 @@ 4.0.0 powerjob-common - 4.2.0 + 4.2.1 jar - 1.7.30 + 1.7.36 3.12.0 - 2.7 + 2.11.0 31.1-jre 3.14.9 2.6.20 - 5.0.4 - 2.12.2 + 5.3.0 + 2.14.0-rc1 5.9.0 diff --git a/powerjob-common/src/main/java/tech/powerjob/common/PowerJobDKey.java b/powerjob-common/src/main/java/tech/powerjob/common/PowerJobDKey.java index 3118076f..45485189 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/PowerJobDKey.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/PowerJobDKey.java @@ -35,6 +35,12 @@ public class PowerJobDKey { public static final String TRANSPORTER_KEEP_ALIVE_TIMEOUT = "powerjob.transporter.keepalive.timeout"; public static final String WORKER_STATUS_CHECK_PERIOD = "powerjob.worker.status-check.normal.period"; + + /** + * allowed PowerJob to invoke Thread#stop to kill a thread when PowerJob can't interrupt the thread + * It's VERY dangerous + */ + public static final String WORKER_ALLOWED_FORCE_STOP_THREAD = "powerjob.worker.allowed-force-stop-thread"; /** * ms */ diff --git a/powerjob-common/src/main/java/tech/powerjob/common/SystemInstanceResult.java b/powerjob-common/src/main/java/tech/powerjob/common/SystemInstanceResult.java index 0050e1fc..9c168664 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/SystemInstanceResult.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/SystemInstanceResult.java @@ -26,6 +26,23 @@ public class SystemInstanceResult { * 任务执行超时 */ public static final String INSTANCE_EXECUTE_TIMEOUT = "instance execute timeout"; + /** + * 任务执行超时,成功打断任务 + */ + public static final String INSTANCE_EXECUTE_TIMEOUT_INTERRUPTED = "instance execute timeout,interrupted success"; + /** + * 任务执行超时,强制终止任务 + */ + public static final String INSTANCE_EXECUTE_TIMEOUT_FORCE_STOP= "instance execute timeout,force stop success"; + + /** + * 用户手动停止任务,成功打断任务 + */ + public static final String USER_STOP_INSTANCE_INTERRUPTED= "user stop instance,interrupted success"; + /** + * 用户手动停止任务,被系统强制终止 + */ + public static final String USER_STOP_INSTANCE_FORCE_STOP= "user stop instance,force stop success"; /** * 创建根任务失败 */ diff --git a/powerjob-common/src/main/java/tech/powerjob/common/enums/ExecuteType.java b/powerjob-common/src/main/java/tech/powerjob/common/enums/ExecuteType.java index 98864a5a..f84957b3 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/enums/ExecuteType.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/enums/ExecuteType.java @@ -26,8 +26,8 @@ public enum ExecuteType { MAP_REDUCE(3, "MapReduce"), MAP(4, "Map"); - int v; - String des; + private final int v; + private final String des; public static ExecuteType of(int v) { for (ExecuteType type : values()) { diff --git a/powerjob-common/src/main/java/tech/powerjob/common/enums/TimeExpressionType.java b/powerjob-common/src/main/java/tech/powerjob/common/enums/TimeExpressionType.java index 30c10f2e..92d90d88 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/enums/TimeExpressionType.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/enums/TimeExpressionType.java @@ -5,6 +5,7 @@ import lombok.AllArgsConstructor; import lombok.Getter; import lombok.ToString; +import java.util.Collections; import java.util.List; /** @@ -24,13 +25,13 @@ public enum TimeExpressionType { FIXED_DELAY(4), WORKFLOW(5); - int v; + private final int v; - public static final List FREQUENT_TYPES = Lists.newArrayList(FIXED_RATE.v, FIXED_DELAY.v); + public static final List FREQUENT_TYPES = Collections.unmodifiableList(Lists.newArrayList(FIXED_RATE.v, FIXED_DELAY.v)); /** * 首次计算触发时间时必须计算出一个有效值 */ - public static final List INSPECT_TYPES = Lists.newArrayList(CRON.v); + public static final List INSPECT_TYPES = Collections.unmodifiableList(Lists.newArrayList(CRON.v)); public static TimeExpressionType of(int v) { for (TimeExpressionType type : values()) { diff --git a/powerjob-common/src/main/java/tech/powerjob/common/enums/WorkflowInstanceStatus.java b/powerjob-common/src/main/java/tech/powerjob/common/enums/WorkflowInstanceStatus.java index 474cbc7c..751083ab 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/enums/WorkflowInstanceStatus.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/enums/WorkflowInstanceStatus.java @@ -4,6 +4,7 @@ import com.google.common.collect.Lists; import lombok.AllArgsConstructor; import lombok.Getter; +import java.util.Collections; import java.util.List; /** @@ -27,11 +28,11 @@ public enum WorkflowInstanceStatus { /** * 广义的运行状态 */ - public static final List GENERALIZED_RUNNING_STATUS = Lists.newArrayList(WAITING.v, RUNNING.v); + public static final List GENERALIZED_RUNNING_STATUS = Collections.unmodifiableList(Lists.newArrayList(WAITING.v, RUNNING.v)); /** * 结束状态 */ - public static final List FINISHED_STATUS = Lists.newArrayList(FAILED.v, SUCCEED.v, STOPPED.v); + public static final List FINISHED_STATUS = Collections.unmodifiableList(Lists.newArrayList(FAILED.v, SUCCEED.v, STOPPED.v)); private final int v; diff --git a/powerjob-common/src/main/java/tech/powerjob/common/request/TaskTrackerReportInstanceStatusReq.java b/powerjob-common/src/main/java/tech/powerjob/common/request/TaskTrackerReportInstanceStatusReq.java index 8347d6ac..8545d2b0 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/request/TaskTrackerReportInstanceStatusReq.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/request/TaskTrackerReportInstanceStatusReq.java @@ -46,6 +46,8 @@ public class TaskTrackerReportInstanceStatusReq implements PowerSerializable { private long startTime; + private Long endTime; + private long reportTime; private String sourceAddress; diff --git a/powerjob-common/src/main/java/tech/powerjob/common/request/WorkerHeartbeat.java b/powerjob-common/src/main/java/tech/powerjob/common/request/WorkerHeartbeat.java index 25800194..02d8d91f 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/request/WorkerHeartbeat.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/request/WorkerHeartbeat.java @@ -17,26 +17,55 @@ import java.util.List; @Data public class WorkerHeartbeat implements PowerSerializable { - // 本机地址 -> IP:port + /** + * 本机地址 -> IP:port + */ private String workerAddress; - // 当前 appName + /** + * 当前 appName + */ private String appName; - // 当前 appId + /** + * 当前 appId + */ private Long appId; - // 当前时间 + /** + * 当前时间 + */ private long heartbeatTime; - // 当前加载的容器(容器名称 -> 容器版本) + /** + * 当前加载的容器(容器名称 -> 容器版本) + */ private List containerInfos; - // worker 版本信息 + /** + * worker 版本信息 + */ private String version; - // 使用的通讯协议 AKKA / HTTP + /** + * 使用的通讯协议 AKKA / HTTP + */ private String protocol; - // worker tag,标识同一个 worker 下的一类集群 ISSUE: 226 + /** + * worker tag,标识同一个 worker 下的一类集群 ISSUE: 226 + */ private String tag; - // 客户端名称 + /** + * 客户端名称 + */ private String client; - // 扩展字段 + /** + * 扩展字段 + */ private String extra; + /** + * 是否已经超载,超载的情况下 Server 一段时间内不会再向其派发任务 + */ + private boolean isOverload; + + private int lightTaskTrackerNum; + + private int heavyTaskTrackerNum; + private SystemMetrics systemMetrics; } diff --git a/powerjob-common/src/main/java/tech/powerjob/common/serialize/JsonUtils.java b/powerjob-common/src/main/java/tech/powerjob/common/serialize/JsonUtils.java index f68bdc6d..6809fa4c 100644 --- a/powerjob-common/src/main/java/tech/powerjob/common/serialize/JsonUtils.java +++ b/powerjob-common/src/main/java/tech/powerjob/common/serialize/JsonUtils.java @@ -4,40 +4,44 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.extern.slf4j.Slf4j; import tech.powerjob.common.exception.PowerJobException; import org.apache.commons.lang3.exception.ExceptionUtils; +import java.io.IOException; + /** * JSON工具类 * * @author tjq * @since 2020/4/16 */ +@Slf4j public class JsonUtils { - private static final ObjectMapper objectMapper = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); static { - objectMapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); + OBJECT_MAPPER.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true); + // + OBJECT_MAPPER.configure(JsonParser.Feature.IGNORE_UNDEFINED, true); + } + + private JsonUtils(){ + } public static String toJSONString(Object obj) { - if (obj instanceof String) { - return (String) obj; - } try { - return objectMapper.writeValueAsString(obj); + return OBJECT_MAPPER.writeValueAsString(obj); }catch (Exception ignore) { } return null; } public static String toJSONStringUnsafe(Object obj) { - if (obj instanceof String) { - return (String) obj; - } try { - return objectMapper.writeValueAsString(obj); + return OBJECT_MAPPER.writeValueAsString(obj); }catch (Exception e) { throw new PowerJobException(e); } @@ -45,27 +49,41 @@ public class JsonUtils { public static byte[] toBytes(Object obj) { try { - return objectMapper.writeValueAsBytes(obj); + return OBJECT_MAPPER.writeValueAsBytes(obj); }catch (Exception ignore) { } return null; } public static T parseObject(String json, Class clz) throws JsonProcessingException { - return objectMapper.readValue(json, clz); + return OBJECT_MAPPER.readValue(json, clz); } - public static T parseObject(byte[] b, Class clz) throws Exception { - return objectMapper.readValue(b, clz); + public static T parseObject(byte[] b, Class clz) throws IOException { + return OBJECT_MAPPER.readValue(b, clz); } - public static T parseObject(byte[] b, TypeReference typeReference) throws Exception { - return objectMapper.readValue(b, typeReference); + public static T parseObject(byte[] b, TypeReference typeReference) throws IOException { + return OBJECT_MAPPER.readValue(b, typeReference); + } + + public static T parseObject(String json, TypeReference typeReference) throws IOException { + return OBJECT_MAPPER.readValue(json, typeReference); + } + + public static T parseObjectIgnoreException(String json, Class clz) { + try { + return OBJECT_MAPPER.readValue(json, clz); + }catch (Exception e) { + log.error("unable to parse json string to object,current string:{}",json,e); + return null; + } + } public static T parseObjectUnsafe(String json, Class clz) { try { - return objectMapper.readValue(json, clz); + return OBJECT_MAPPER.readValue(json, clz); }catch (Exception e) { ExceptionUtils.rethrow(e); } diff --git a/powerjob-common/src/main/java/tech/powerjob/common/utils/JavaUtils.java b/powerjob-common/src/main/java/tech/powerjob/common/utils/JavaUtils.java new file mode 100644 index 00000000..4b0b0c24 --- /dev/null +++ b/powerjob-common/src/main/java/tech/powerjob/common/utils/JavaUtils.java @@ -0,0 +1,57 @@ +package tech.powerjob.common.utils; + +import lombok.extern.slf4j.Slf4j; + +import java.io.File; +import java.io.IOException; +import java.net.JarURLConnection; +import java.net.URL; +import java.net.URLConnection; +import java.security.CodeSource; +import java.util.jar.Attributes; +import java.util.jar.JarFile; + +/** + * Java 语言相关的工具 + * + * @author tjq + * @since 2022/10/23 + */ +@Slf4j +public class JavaUtils { + + /** + * 获取类所在 Jar 包的版本 + * @param clz 类 + * @return 包版本 + */ + public static String determinePackageVersion(Class clz) { + try { + + String implementationVersion = clz.getPackage().getImplementationVersion(); + if (implementationVersion != null) { + return implementationVersion; + } + CodeSource codeSource = clz.getProtectionDomain().getCodeSource(); + if (codeSource == null) { + return null; + } + URL codeSourceLocation = codeSource.getLocation(); + + URLConnection connection = codeSourceLocation.openConnection(); + if (connection instanceof JarURLConnection) { + return getImplementationVersion(((JarURLConnection) connection).getJarFile()); + } + try (JarFile jarFile = new JarFile(new File(codeSourceLocation.toURI()))) { + return getImplementationVersion(jarFile); + } + } + catch (Throwable t) { + log.warn("[JavaUtils] determinePackageVersion for clz[{}] failed, msg: {}", clz.getSimpleName(), t.toString()); + } + return null; + } + private static String getImplementationVersion(JarFile jarFile) throws IOException { + return jarFile.getManifest().getMainAttributes().getValue(Attributes.Name.IMPLEMENTATION_VERSION); + } +} diff --git a/powerjob-common/src/test/java/tech/powerjob/common/utils/JavaUtilsTest.java b/powerjob-common/src/test/java/tech/powerjob/common/utils/JavaUtilsTest.java new file mode 100644 index 00000000..4e50b79e --- /dev/null +++ b/powerjob-common/src/test/java/tech/powerjob/common/utils/JavaUtilsTest.java @@ -0,0 +1,25 @@ +package tech.powerjob.common.utils; + +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.Test; +import org.slf4j.LoggerFactory; + +import static org.junit.jupiter.api.Assertions.*; + + +/** + * Java 语言相关的工具测试 + * + * @author tjq + * @since 2022/10/23 + */ +@Slf4j +class JavaUtilsTest { + + @Test + void determinePackageVersion() { + + String packageVersion = JavaUtils.determinePackageVersion(LoggerFactory.class); + log.info("[determinePackageVersion] LoggerFactory's package version: {}", packageVersion); + } +} \ No newline at end of file diff --git a/powerjob-official-processors/pom.xml b/powerjob-official-processors/pom.xml index fcef544d..41229d45 100644 --- a/powerjob-official-processors/pom.xml +++ b/powerjob-official-processors/pom.xml @@ -18,18 +18,18 @@ 3.2.4 - 5.6.1 - 1.2.3 - 4.2.0 + 5.9.1 + 1.2.9 + 4.2.1 5.2.9.RELEASE - 1.4.200 + 2.1.214 8.0.28 1.2.83 3.14.9 30.1.1-jre - 2.6 + 2.11.0 3.10 diff --git a/powerjob-official-processors/src/main/java/tech/powerjob/official/processors/impl/sql/SpringDatasourceSqlProcessor.java b/powerjob-official-processors/src/main/java/tech/powerjob/official/processors/impl/sql/SpringDatasourceSqlProcessor.java index 6d2bce94..120feb54 100644 --- a/powerjob-official-processors/src/main/java/tech/powerjob/official/processors/impl/sql/SpringDatasourceSqlProcessor.java +++ b/powerjob-official-processors/src/main/java/tech/powerjob/official/processors/impl/sql/SpringDatasourceSqlProcessor.java @@ -4,7 +4,7 @@ import tech.powerjob.worker.core.processor.TaskContext; import com.google.common.collect.Maps; import lombok.extern.slf4j.Slf4j; import org.springframework.util.Assert; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import javax.sql.DataSource; import java.sql.Connection; diff --git a/powerjob-server/pom.xml b/powerjob-server/pom.xml index 1848a53d..37f64bbc 100644 --- a/powerjob-server/pom.xml +++ b/powerjob-server/pom.xml @@ -10,7 +10,7 @@ 4.0.0 powerjob-server - 4.2.0 + 4.2.1 pom @@ -28,19 +28,19 @@ 2.9.2 2.7.4 - 4.2.0 + 4.2.1 - 8.0.28 + 8.0.30 19.7.0.0 7.4.1.jre8 11.5.0.0 42.2.14 - 1.4.200 + 2.1.214 - 2.5.2 + 2.11.2 5.7.0.202003110725-r 3.0.1 - 3.6 + 3.8.0 1.2.83 1.0.1 4.0.2 @@ -48,6 +48,8 @@ true + 3.0.10 + 9.1.6 @@ -257,7 +259,7 @@ com.cronutils cron-utils - 9.1.6 + ${cron-utils.version} @@ -277,13 +279,13 @@ org.codehaus.groovy groovy-jsr223 - 3.0.10 + ${groovy.version} org.codehaus.groovy groovy-json - 3.0.10 + ${groovy.version} diff --git a/powerjob-server/powerjob-server-common/pom.xml b/powerjob-server/powerjob-server-common/pom.xml index 3b6bd0ec..227e65b1 100644 --- a/powerjob-server/powerjob-server-common/pom.xml +++ b/powerjob-server/powerjob-server-common/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/Holder.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/Holder.java new file mode 100644 index 00000000..2128fd43 --- /dev/null +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/Holder.java @@ -0,0 +1,23 @@ +package tech.powerjob.server.common; + + +/** + * @author Echo009 + * @since 2022/10/2 + */ +public class Holder { + + private T value; + + public Holder(T value) { + this.value = value; + } + + public T get() { + return value; + } + + public void set(T value) { + this.value = value; + } +} diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/module/WorkerInfo.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/module/WorkerInfo.java index e3e401f1..c16d2384 100644 --- a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/module/WorkerInfo.java +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/module/WorkerInfo.java @@ -1,9 +1,10 @@ package tech.powerjob.server.common.module; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; import tech.powerjob.common.model.DeployedContainerInfo; import tech.powerjob.common.model.SystemMetrics; import tech.powerjob.common.request.WorkerHeartbeat; -import lombok.Data; import java.util.List; @@ -14,6 +15,7 @@ import java.util.List; * @since 2021/2/7 */ @Data +@Slf4j public class WorkerInfo { private String address; @@ -26,6 +28,14 @@ public class WorkerInfo { private String tag; + private int lightTaskTrackerNum; + + private int heavyTaskTrackerNum; + + private long lastOverloadTime; + + private boolean overloading; + private SystemMetrics systemMetrics; private List containerInfos; @@ -40,10 +50,25 @@ public class WorkerInfo { tag = workerHeartbeat.getTag(); systemMetrics = workerHeartbeat.getSystemMetrics(); containerInfos = workerHeartbeat.getContainerInfos(); + + lightTaskTrackerNum = workerHeartbeat.getLightTaskTrackerNum(); + heavyTaskTrackerNum = workerHeartbeat.getHeavyTaskTrackerNum(); + + if (workerHeartbeat.isOverload()) { + overloading = true; + lastOverloadTime = workerHeartbeat.getHeartbeatTime(); + log.warn("[WorkerInfo] worker {} is overload!", getAddress()); + } else { + overloading = false; + } } public boolean timeout() { long timeout = System.currentTimeMillis() - lastActiveTime; return timeout > WORKER_TIMEOUT_MS; } + + public boolean overload() { + return overloading; + } } diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/thread/NewThreadRunRejectedExecutionHandler.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/thread/NewThreadRunRejectedExecutionHandler.java new file mode 100644 index 00000000..9f5912fc --- /dev/null +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/thread/NewThreadRunRejectedExecutionHandler.java @@ -0,0 +1,33 @@ +package tech.powerjob.server.common.thread; + +import lombok.extern.slf4j.Slf4j; + +import java.util.concurrent.RejectedExecutionHandler; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicLong; + +/** + * @author Echo009 + * @since 2022/10/12 + */ +@Slf4j +public class NewThreadRunRejectedExecutionHandler implements RejectedExecutionHandler { + + private static final AtomicLong COUNTER = new AtomicLong(); + + private final String source; + + public NewThreadRunRejectedExecutionHandler(String source) { + this.source = source; + } + + @Override + public void rejectedExecution(Runnable r, ThreadPoolExecutor p) { + log.error("[{}] ThreadPool[{}] overload, the task[{}] will run by a new thread!, Maybe you need to adjust the ThreadPool config!", source, p, r); + if (!p.isShutdown()) { + String threadName = source + "-T-" + COUNTER.getAndIncrement(); + log.info("[{}] create new thread[{}] to run job", source, threadName); + new Thread(r, threadName).start(); + } + } +} diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/HashedWheelTimer.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/HashedWheelTimer.java index ca1662e3..840d2d84 100644 --- a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/HashedWheelTimer.java +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/HashedWheelTimer.java @@ -65,9 +65,11 @@ public class HashedWheelTimer implements Timer { taskProcessPool = null; }else { ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat("HashedWheelTimer-Executor-%d").build(); - BlockingQueue queue = Queues.newLinkedBlockingQueue(16); + // 这里需要调整一下队列大小 + BlockingQueue queue = Queues.newLinkedBlockingQueue(8192); int core = Math.max(Runtime.getRuntime().availableProcessors(), processThreadNum); - taskProcessPool = new ThreadPoolExecutor(core, 4 * core, + // 基本都是 io 密集型任务 + taskProcessPool = new ThreadPoolExecutor(core, 2 * core, 60, TimeUnit.SECONDS, queue, threadFactory, RejectedExecutionHandlerFactory.newCallerRun("PowerJobTimeWheelPool")); } diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/HashedWheelTimerHolder.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/HashedWheelTimerHolder.java index 5d73a46a..f8abc5be 100644 --- a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/HashedWheelTimerHolder.java +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/HashedWheelTimerHolder.java @@ -11,7 +11,9 @@ import tech.powerjob.server.common.timewheel.Timer; */ public class HashedWheelTimerHolder { - // 非精确时间轮,每 5S 走一格 + /** + * 非精确时间轮,每 5S 走一格 + */ public static final Timer INACCURATE_TIMER = new HashedWheelTimer(5000, 16, 0); private HashedWheelTimerHolder() { diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/InstanceTimeWheelService.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/InstanceTimeWheelService.java index 35d06d4c..d38d53d8 100644 --- a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/InstanceTimeWheelService.java +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/timewheel/holder/InstanceTimeWheelService.java @@ -19,14 +19,22 @@ public class InstanceTimeWheelService { private static final Map CARGO = Maps.newConcurrentMap(); - // 精确调度时间轮,每 1MS 走一格 + /** + * 精确调度时间轮,每 1MS 走一格 + */ private static final Timer TIMER = new HashedWheelTimer(1, 4096, Runtime.getRuntime().availableProcessors() * 4); - // 非精确调度时间轮,用于处理高延迟任务,每 10S 走一格 + /** + * 非精确调度时间轮,用于处理高延迟任务,每 10S 走一格 + */ private static final Timer SLOW_TIMER = new HashedWheelTimer(10000, 12, 0); - // 支持取消的时间间隔,低于该阈值则不会放进 CARGO + /** + * 支持取消的时间间隔,低于该阈值则不会放进 CARGO + */ private static final long MIN_INTERVAL_MS = 1000; - // 长延迟阈值 + /** + * 长延迟阈值 + */ private static final long LONG_DELAY_THRESHOLD_MS = 60000; /** diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/AOPUtils.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/AOPUtils.java index 3d06dd64..66d98f14 100644 --- a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/AOPUtils.java +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/AOPUtils.java @@ -25,8 +25,8 @@ import java.lang.reflect.Method; @Slf4j public class AOPUtils { - private static final ExpressionParser parser = new SpelExpressionParser(); - private static final ParameterNameDiscoverer discoverer = new LocalVariableTableParameterNameDiscoverer(); + private static final ExpressionParser PARSER = new SpelExpressionParser(); + private static final ParameterNameDiscoverer DISCOVERER = new LocalVariableTableParameterNameDiscoverer(); public static String parseRealClassName(JoinPoint joinPoint) { return joinPoint.getSignature().getDeclaringType().getSimpleName(); @@ -50,7 +50,7 @@ public class AOPUtils { } public static T parseSpEl(Method method, Object[] arguments, String spEl, Class clazz, T defaultResult) { - String[] params = discoverer.getParameterNames(method); + String[] params = DISCOVERER.getParameterNames(method); assert params != null; EvaluationContext context = new StandardEvaluationContext(); @@ -58,7 +58,7 @@ public class AOPUtils { context.setVariable(params[len], arguments[len]); } try { - Expression expression = parser.parseExpression(spEl); + Expression expression = PARSER.parseExpression(spEl); return expression.getValue(context, clazz); } catch (Exception e) { log.error("[AOPUtils] parse SpEL failed for method[{}], please concat @tjq to fix the bug!", method.getName(), e); diff --git a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/TimeUtils.java b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/TimeUtils.java index 09df140f..ea6e388b 100644 --- a/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/TimeUtils.java +++ b/powerjob-server/powerjob-server-common/src/main/java/tech/powerjob/server/common/utils/TimeUtils.java @@ -19,9 +19,13 @@ import java.util.List; @Slf4j public class TimeUtils { - // NTP 授时服务器(阿里云 -> 交大 -> 水果) + /** + * NTP 授时服务器(阿里云 -> 交大 -> 水果) + */ private static final List NTP_SERVER_LIST = Lists.newArrayList("ntp.aliyun.com", "ntp.sjtu.edu.cn", "time1.apple.com"); - // 最大误差 5S + /** + * 最大误差 5S + */ private static final long MAX_OFFSET = 5000; public static void check() throws TimeCheckException { diff --git a/powerjob-server/powerjob-server-core/pom.xml b/powerjob-server/powerjob-server-core/pom.xml index 5e5c40f6..2a3e13ea 100644 --- a/powerjob-server/powerjob-server-core/pom.xml +++ b/powerjob-server/powerjob-server-core/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/DispatchService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/DispatchService.java index db0183f7..4a80cf87 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/DispatchService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/DispatchService.java @@ -1,9 +1,18 @@ package tech.powerjob.server.core; +import com.google.common.collect.Lists; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; import tech.powerjob.common.RemoteConstant; import tech.powerjob.common.SystemInstanceResult; import tech.powerjob.common.enums.*; import tech.powerjob.common.request.ServerScheduleJobReq; +import tech.powerjob.server.common.Holder; +import tech.powerjob.server.common.module.WorkerInfo; import tech.powerjob.server.core.instance.InstanceManager; import tech.powerjob.server.core.instance.InstanceMetadataService; import tech.powerjob.server.core.lock.UseCacheLock; @@ -12,18 +21,11 @@ import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; import tech.powerjob.server.remote.transport.TransportService; import tech.powerjob.server.remote.worker.WorkerClusterQueryService; -import tech.powerjob.server.common.module.WorkerInfo; -import com.google.common.collect.Lists; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; -import org.springframework.stereotype.Service; -import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; -import javax.annotation.Resource; +import java.util.ArrayList; import java.util.Date; import java.util.List; -import java.util.concurrent.ThreadLocalRandom; +import java.util.Optional; import java.util.stream.Collectors; import static tech.powerjob.common.enums.InstanceStatus.*; @@ -38,35 +40,39 @@ import static tech.powerjob.common.enums.InstanceStatus.*; */ @Slf4j @Service +@RequiredArgsConstructor public class DispatchService { - @Resource - private TransportService transportService; - @Resource - private WorkerClusterQueryService workerClusterQueryService; - @Resource - private InstanceManager instanceManager; - @Resource - private InstanceMetadataService instanceMetadataService; - @Resource - private InstanceInfoRepository instanceInfoRepository; + private final TransportService transportService; + + private final WorkerClusterQueryService workerClusterQueryService; + + private final InstanceManager instanceManager; + + private final InstanceMetadataService instanceMetadataService; + + private final InstanceInfoRepository instanceInfoRepository; /** - * 重新派发任务实例(不考虑实例当前的状态) + * 异步重新派发 * - * @param jobInfo 任务信息(注意,这里传入的任务信息有可能为“空”) - * @param instanceId 实例ID + * @param instanceId 实例 ID */ - @UseCacheLock(type = "processJobInstance", key = "#jobInfo.getMaxInstanceNum() > 0 || T(tech.powerjob.common.enums.TimeExpressionType).FREQUENT_TYPES.contains(#jobInfo.getTimeExpressionType()) ? #jobInfo.getId() : #instanceId", concurrencyLevel = 1024) - public void redispatch(JobInfoDO jobInfo, Long instanceId) { - InstanceInfoDO instance = instanceInfoRepository.findByInstanceId(instanceId); + @UseCacheLock(type = "processJobInstance", key = "#instanceId", concurrencyLevel = 1024) + public void redispatchAsync(Long instanceId, int originStatus) { // 将状态重置为等待派发 - instance.setStatus(InstanceStatus.WAITING_DISPATCH.getV()); - instance.setGmtModified(new Date()); - instanceInfoRepository.saveAndFlush(instance); - dispatch(jobInfo, instanceId); + instanceInfoRepository.updateStatusAndGmtModifiedByInstanceIdAndOriginStatus(instanceId, originStatus, InstanceStatus.WAITING_DISPATCH.getV(), new Date()); } + /** + * 异步批量重新派发,不加锁 + */ + public void redispatchBatchAsyncLockFree(List instanceIdList, int originStatus) { + // 将状态重置为等待派发 + instanceInfoRepository.updateStatusAndGmtModifiedByInstanceIdListAndOriginStatus(instanceIdList, originStatus, InstanceStatus.WAITING_DISPATCH.getV(), new Date()); + } + + /** * 将任务从Server派发到Worker(TaskTracker) * 只会派发当前状态为等待派发的任务实例 @@ -78,13 +84,16 @@ public class DispatchService { * 迁移至 {@link InstanceManager#updateStatus} 中处理 * ************************************************** * - * @param jobInfo 任务的元信息 - * @param instanceId 任务实例ID + * @param jobInfo 任务的元信息 + * @param instanceId 任务实例ID + * @param instanceInfoOptional 任务实例信息,可选 + * @param overloadOptional 超载信息,可选 */ @UseCacheLock(type = "processJobInstance", key = "#jobInfo.getMaxInstanceNum() > 0 || T(tech.powerjob.common.enums.TimeExpressionType).FREQUENT_TYPES.contains(#jobInfo.getTimeExpressionType()) ? #jobInfo.getId() : #instanceId", concurrencyLevel = 1024) - public void dispatch(JobInfoDO jobInfo, Long instanceId) { + public void dispatch(JobInfoDO jobInfo, Long instanceId, Optional instanceInfoOptional, Optional> overloadOptional) { + // 允许从外部传入实例信息,减少 io 次数 // 检查当前任务是否被取消 - InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId); + InstanceInfoDO instanceInfo = instanceInfoOptional.orElseGet(() -> instanceInfoRepository.findByInstanceId(instanceId)); Long jobId = instanceInfo.getJobId(); if (CANCELED.getV() == instanceInfo.getStatus()) { log.info("[Dispatcher-{}|{}] cancel dispatch due to instance has been canceled", jobId, instanceId); @@ -125,7 +134,6 @@ public class DispatchService { String result = String.format(SystemInstanceResult.TOO_MANY_INSTANCES, runningInstanceCount, maxInstanceNum); log.warn("[Dispatcher-{}|{}] cancel dispatch job due to too much instance is running ({} > {}).", jobId, instanceId, runningInstanceCount, maxInstanceNum); instanceInfoRepository.update4TriggerFailed(instanceId, FAILED.getV(), current, current, RemoteConstant.EMPTY_ADDRESS, result, now); - instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, result); return; } @@ -141,8 +149,15 @@ public class DispatchService { instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, SystemInstanceResult.NO_WORKER_AVAILABLE); return; } + // 判断是否超载,在所有可用 worker 超载的情况下直接跳过当前任务 + suitableWorkers = filterOverloadWorker(suitableWorkers); + if (suitableWorkers.isEmpty()) { + // 直接取消派发,减少一次数据库 io + overloadOptional.ifPresent(booleanHolder -> booleanHolder.set(true)); + log.warn("[Dispatcher-{}|{}] cancel to dispatch job due to all worker is overload", jobId, instanceId); + return; + } List workerIpList = suitableWorkers.stream().map(WorkerInfo::getAddress).collect(Collectors.toList()); - // 构造任务调度请求 ServerScheduleJobReq req = constructServerScheduleJobReq(jobInfo, instanceInfo, workerIpList); @@ -154,12 +169,23 @@ public class DispatchService { log.info("[Dispatcher-{}|{}] send schedule request to TaskTracker[protocol:{},address:{}] successfully: {}.", jobId, instanceId, taskTracker.getProtocol(), taskTrackerAddress, req); // 修改状态 - instanceInfoRepository.update4TriggerSucceed(instanceId, WAITING_WORKER_RECEIVE.getV(), current, taskTrackerAddress, now); - + instanceInfoRepository.update4TriggerSucceed(instanceId, WAITING_WORKER_RECEIVE.getV(), current, taskTrackerAddress, now, instanceInfo.getStatus()); // 装载缓存 instanceMetadataService.loadJobInfo(instanceId, jobInfo); } + private List filterOverloadWorker(List suitableWorkers) { + + List res = new ArrayList<>(suitableWorkers.size()); + for (WorkerInfo suitableWorker : suitableWorkers) { + if (suitableWorker.overload()){ + continue; + } + res.add(suitableWorker); + } + return res; + } + /** * 构造任务调度请求 */ diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/container/ContainerService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/container/ContainerService.java index de2ffa47..56dc22ee 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/container/ContainerService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/container/ContainerService.java @@ -43,7 +43,7 @@ import org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider; import org.springframework.core.env.Environment; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.web.multipart.MultipartFile; import javax.annotation.Resource; @@ -84,7 +84,9 @@ public class ContainerService { // 并发部署的机器数量 private static final int DEPLOY_BATCH_NUM = 50; // 部署间隔 - private static final long DEPLOY_MIN_INTERVAL = 10 * 60 * 1000; + private static final long DEPLOY_MIN_INTERVAL = 10 * 60 * 1000L; + // 最长部署时间 + private static final long DEPLOY_MAX_COST_TIME = 10 * 60 * 1000L; /** * 保存容器 @@ -208,14 +210,13 @@ public class ContainerService { String deployLock = "containerDeployLock-" + containerId; RemoteEndpoint.Async remote = session.getAsyncRemote(); // 最长部署时间:10分钟 - boolean lock = lockService.tryLock(deployLock, 10 * 60 * 1000); + boolean lock = lockService.tryLock(deployLock, DEPLOY_MAX_COST_TIME); if (!lock) { remote.sendText("SYSTEM: acquire deploy lock failed, maybe other user is deploying, please wait until the running deploy task finished."); return; } try { - Optional containerInfoOpt = containerInfoRepository.findById(containerId); if (!containerInfoOpt.isPresent()) { remote.sendText("SYSTEM: can't find container by id: " + containerId); diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/AbWorkerRequestHandler.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/AbWorkerRequestHandler.java index f1b68eec..b0e8047c 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/AbWorkerRequestHandler.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/AbWorkerRequestHandler.java @@ -1,5 +1,6 @@ package tech.powerjob.server.core.handler; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.exception.ExceptionUtils; import org.springframework.beans.BeanUtils; @@ -22,7 +23,6 @@ import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepositor import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; import tech.powerjob.server.remote.worker.WorkerClusterQueryService; -import javax.annotation.Resource; import java.util.List; import java.util.Optional; import java.util.concurrent.RejectedExecutionException; @@ -34,17 +34,18 @@ import java.util.stream.Collectors; * @author tjq * @since 2022/9/11 */ +@RequiredArgsConstructor @Slf4j public abstract class AbWorkerRequestHandler implements IWorkerRequestHandler { - @Resource - protected MonitorService monitorService; - @Resource - protected Environment environment; - @Resource - protected ContainerInfoRepository containerInfoRepository; - @Resource - private WorkerClusterQueryService workerClusterQueryService; + + protected final MonitorService monitorService; + + protected final Environment environment; + + protected final ContainerInfoRepository containerInfoRepository; + + private final WorkerClusterQueryService workerClusterQueryService; protected abstract void processWorkerHeartbeat0(WorkerHeartbeat heartbeat, WorkerHeartbeatEvent event); diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerHolder.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerHolder.java index ac259e03..f9c267cc 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerHolder.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerHolder.java @@ -1,9 +1,7 @@ package tech.powerjob.server.core.handler; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; -import javax.annotation.Resource; /** * WorkerRequestHandlerHolder @@ -16,13 +14,14 @@ public class WorkerRequestHandlerHolder { private static IWorkerRequestHandler workerRequestHandler; + public WorkerRequestHandlerHolder(IWorkerRequestHandler injectedWorkerRequestHandler) { + workerRequestHandler = injectedWorkerRequestHandler; + } public static IWorkerRequestHandler fetchWorkerRequestHandler() { + if (workerRequestHandler == null){ + throw new IllegalStateException("WorkerRequestHandlerHolder not initialized!"); + } return workerRequestHandler; } - - @Autowired - public void setWorkerRequestHandler(IWorkerRequestHandler workerRequestHandler) { - WorkerRequestHandlerHolder.workerRequestHandler = workerRequestHandler; - } } diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerImpl.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerImpl.java index 77e573be..ed4e9206 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerImpl.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/handler/WorkerRequestHandlerImpl.java @@ -1,7 +1,7 @@ package tech.powerjob.server.core.handler; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; +import org.springframework.core.env.Environment; import org.springframework.stereotype.Component; import org.springframework.util.CollectionUtils; import tech.powerjob.common.enums.InstanceStatus; @@ -12,12 +12,14 @@ import tech.powerjob.common.response.AskResponse; import tech.powerjob.server.core.instance.InstanceLogService; import tech.powerjob.server.core.instance.InstanceManager; import tech.powerjob.server.core.workflow.WorkflowInstanceManager; +import tech.powerjob.server.monitor.MonitorService; import tech.powerjob.server.monitor.events.w2s.TtReportInstanceStatusEvent; import tech.powerjob.server.monitor.events.w2s.WorkerHeartbeatEvent; import tech.powerjob.server.monitor.events.w2s.WorkerLogReportEvent; +import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository; import tech.powerjob.server.remote.worker.WorkerClusterManagerService; +import tech.powerjob.server.remote.worker.WorkerClusterQueryService; -import javax.annotation.Resource; import java.util.Optional; /** @@ -30,12 +32,19 @@ import java.util.Optional; @Component public class WorkerRequestHandlerImpl extends AbWorkerRequestHandler { - @Resource - private InstanceManager instanceManager; - @Resource - private WorkflowInstanceManager workflowInstanceManager; - @Resource - private InstanceLogService instanceLogService; + private final InstanceManager instanceManager; + + private final WorkflowInstanceManager workflowInstanceManager; + + private final InstanceLogService instanceLogService; + + public WorkerRequestHandlerImpl(InstanceManager instanceManager, WorkflowInstanceManager workflowInstanceManager, InstanceLogService instanceLogService, + MonitorService monitorService, Environment environment, ContainerInfoRepository containerInfoRepository, WorkerClusterQueryService workerClusterQueryService) { + super(monitorService, environment, containerInfoRepository, workerClusterQueryService); + this.instanceManager = instanceManager; + this.workflowInstanceManager = workflowInstanceManager; + this.instanceLogService = instanceLogService; + } @Override protected void processWorkerHeartbeat0(WorkerHeartbeat heartbeat, WorkerHeartbeatEvent event) { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceLogService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceLogService.java index 9f75587a..5adfe8d7 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceLogService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceLogService.java @@ -56,6 +56,7 @@ public class InstanceLogService { @Resource private InstanceMetadataService instanceMetadataService; + @Resource private GridFsManager gridFsManager; /** @@ -63,6 +64,7 @@ public class InstanceLogService { */ @Resource(name = "localTransactionTemplate") private TransactionTemplate localTransactionTemplate; + @Resource private LocalInstanceLogRepository localInstanceLogRepository; diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceManager.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceManager.java index 351fbf9f..40f066e1 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceManager.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceManager.java @@ -1,9 +1,10 @@ package tech.powerjob.server.core.instance; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; import org.springframework.stereotype.Service; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.Protocol; import tech.powerjob.common.enums.TimeExpressionType; @@ -39,22 +40,22 @@ import java.util.concurrent.TimeUnit; */ @Slf4j @Service +@RequiredArgsConstructor public class InstanceManager { - @Resource - private AlarmCenter alarmCenter; - @Resource - private InstanceLogService instanceLogService; - @Resource - private InstanceMetadataService instanceMetadataService; - @Resource - private InstanceInfoRepository instanceInfoRepository; - @Resource - private WorkflowInstanceManager workflowInstanceManager; - @Resource - private TransportService transportService; - @Resource - private WorkerClusterQueryService workerClusterQueryService; + private final AlarmCenter alarmCenter; + + private final InstanceLogService instanceLogService; + + private final InstanceMetadataService instanceMetadataService; + + private final InstanceInfoRepository instanceInfoRepository; + + private final WorkflowInstanceManager workflowInstanceManager; + + private final TransportService transportService; + + private final WorkerClusterQueryService workerClusterQueryService; /** * 更新任务状态 @@ -69,7 +70,6 @@ public class InstanceManager { public void updateStatus(TaskTrackerReportInstanceStatusReq req) throws ExecutionException { Long instanceId = req.getInstanceId(); - // 获取相关数据 JobInfoDO jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(req.getInstanceId()); InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId); @@ -77,6 +77,7 @@ public class InstanceManager { log.warn("[InstanceManager-{}] can't find InstanceInfo from database", instanceId); return; } + int originStatus = instanceInfo.getStatus(); // 丢弃过期的上报数据 if (req.getReportTime() <= instanceInfo.getLastReportTime()) { log.warn("[InstanceManager-{}] receive the expired status report request: {}, this report will be dropped.", instanceId, req); @@ -134,8 +135,7 @@ public class InstanceManager { boolean finished = false; if (receivedInstanceStatus == InstanceStatus.SUCCEED) { instanceInfo.setResult(req.getResult()); - instanceInfo.setFinishedTime(System.currentTimeMillis()); - + instanceInfo.setFinishedTime(req.getEndTime() == null ? System.currentTimeMillis() : req.getEndTime()); finished = true; } else if (receivedInstanceStatus == InstanceStatus.FAILED) { @@ -152,21 +152,23 @@ public class InstanceManager { instanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV()); } else { instanceInfo.setResult(req.getResult()); - instanceInfo.setFinishedTime(System.currentTimeMillis()); + instanceInfo.setFinishedTime(req.getEndTime() == null ? System.currentTimeMillis() : req.getEndTime()); finished = true; log.info("[InstanceManager-{}] instance execute failed and have no chance to retry.", instanceId); } } - - // 同步状态变更信息到数据库 - instanceInfoRepository.saveAndFlush(instanceInfo); - if (finished) { + // 最终状态允许直接覆盖更新 + instanceInfoRepository.saveAndFlush(instanceInfo); // 这里的 InstanceStatus 只有 成功/失败 两种,手动停止不会由 TaskTracker 上报 processFinishedInstance(instanceId, req.getWfInstanceId(), receivedInstanceStatus, req.getResult()); - + return; + } + // 带条件更新 + final int i = instanceInfoRepository.updateStatusChangeInfoByInstanceIdAndStatus(instanceInfo.getLastReportTime(), instanceInfo.getGmtModified(), instanceInfo.getRunningTimes(), instanceInfo.getStatus(), instanceInfo.getInstanceId(), originStatus); + if (i == 0) { + log.warn("[InstanceManager-{}] update instance status failed, maybe the instance status has been changed by other thread. discard this status change,{}", instanceId, instanceInfo); } - } private void stopInstance(Long instanceId, InstanceInfoDO instanceInfo) { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceMetadataService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceMetadataService.java index 79a320e1..7ff0b223 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceMetadataService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceMetadataService.java @@ -1,5 +1,6 @@ package tech.powerjob.server.core.instance; +import lombok.RequiredArgsConstructor; import tech.powerjob.server.persistence.remote.model.InstanceInfoDO; import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; @@ -21,14 +22,16 @@ import java.util.concurrent.ExecutionException; * @since 2020/6/23 */ @Service +@RequiredArgsConstructor public class InstanceMetadataService implements InitializingBean { - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; + private final JobInfoRepository jobInfoRepository; - // 缓存,一旦生成任务实例,其对应的 JobInfo 不应该再改变(即使源数据改变) + private final InstanceInfoRepository instanceInfoRepository; + + /** + * 缓存,一旦生成任务实例,其对应的 JobInfo 不应该再改变(即使源数据改变) + */ private Cache instanceId2JobInfoCache; @Value("${oms.instance.metadata.cache.size}") diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceService.java index bd506b5d..2d465b96 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/instance/InstanceService.java @@ -1,13 +1,14 @@ package tech.powerjob.server.core.instance; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; import org.springframework.stereotype.Service; -import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.PowerQuery; import tech.powerjob.common.SystemInstanceResult; import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.Protocol; +import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.model.InstanceDetail; import tech.powerjob.common.request.ServerQueryInstanceStatusReq; import tech.powerjob.common.request.ServerStopInstanceReq; @@ -28,7 +29,6 @@ import tech.powerjob.server.remote.server.redirector.DesignateServer; import tech.powerjob.server.remote.transport.TransportService; import tech.powerjob.server.remote.worker.WorkerClusterQueryService; -import javax.annotation.Resource; import java.util.Date; import java.util.List; import java.util.Optional; @@ -45,23 +45,22 @@ import static tech.powerjob.common.enums.InstanceStatus.STOPPED; */ @Slf4j @Service +@RequiredArgsConstructor public class InstanceService { - @Resource - private TransportService transportService; - @Resource - private DispatchService dispatchService; - @Resource - private IdGenerateService idGenerateService; - @Resource - private InstanceManager instanceManager; - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; + private final TransportService transportService; - @Resource - private WorkerClusterQueryService workerClusterQueryService; + private final DispatchService dispatchService; + + private final IdGenerateService idGenerateService; + + private final InstanceManager instanceManager; + + private final JobInfoRepository jobInfoRepository; + + private final InstanceInfoRepository instanceInfoRepository; + + private final WorkerClusterQueryService workerClusterQueryService; /** * 创建任务实例(注意,该方法并不调用 saveAndFlush,如果有需要立即同步到DB的需求,请在方法结束后手动调用 flush) @@ -78,7 +77,7 @@ public class InstanceService { * @param expectTriggerTime 预期执行时间 * @return 任务实例ID */ - public Long create(Long jobId, Long appId, String jobParams, String instanceParams, Long wfInstanceId, Long expectTriggerTime) { + public InstanceInfoDO create(Long jobId, Long appId, String jobParams, String instanceParams, Long wfInstanceId, Long expectTriggerTime) { Long instanceId = idGenerateService.allocate(); Date now = new Date(); @@ -100,7 +99,7 @@ public class InstanceService { newInstanceInfo.setGmtModified(now); instanceInfoRepository.save(newInstanceInfo); - return instanceId; + return newInstanceInfo; } /** @@ -181,7 +180,7 @@ public class InstanceService { // 派发任务 Long jobId = instanceInfo.getJobId(); JobInfoDO jobInfo = jobInfoRepository.findById(jobId).orElseThrow(() -> new PowerJobException("can't find job info by jobId: " + jobId)); - dispatchService.redispatch(jobInfo, instanceId); + dispatchService.dispatch(jobInfo, instanceId,Optional.of(instanceInfo),Optional.empty()); } /** diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/lock/UseCacheLockAspect.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/lock/UseCacheLockAspect.java index 11b4591c..1bf467cb 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/lock/UseCacheLockAspect.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/lock/UseCacheLockAspect.java @@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSON; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.Maps; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.aspectj.lang.ProceedingJoinPoint; import org.aspectj.lang.annotation.Around; @@ -30,10 +31,10 @@ import java.util.concurrent.locks.ReentrantLock; @Aspect @Component @Order(1) +@RequiredArgsConstructor public class UseCacheLockAspect { - @Resource - private MonitorService monitorService; + private final MonitorService monitorService; private final Map> lockContainer = Maps.newConcurrentMap(); diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CleanService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CleanService.java index d9c9e137..06037106 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CleanService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CleanService.java @@ -1,15 +1,5 @@ package tech.powerjob.server.core.scheduler; -import tech.powerjob.common.enums.InstanceStatus; -import tech.powerjob.common.enums.WorkflowInstanceStatus; -import tech.powerjob.server.common.constants.PJThreadPool; -import tech.powerjob.server.common.utils.OmsFileUtils; -import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; -import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository; -import tech.powerjob.server.persistence.mongodb.GridFsManager; -import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository; -import tech.powerjob.server.remote.worker.WorkerClusterManagerService; -import tech.powerjob.server.extension.LockService; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; import lombok.extern.slf4j.Slf4j; @@ -18,8 +8,17 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; +import tech.powerjob.common.enums.InstanceStatus; +import tech.powerjob.common.enums.WorkflowInstanceStatus; +import tech.powerjob.server.common.constants.PJThreadPool; +import tech.powerjob.server.common.utils.OmsFileUtils; +import tech.powerjob.server.extension.LockService; +import tech.powerjob.server.persistence.mongodb.GridFsManager; +import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; +import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository; +import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository; +import tech.powerjob.server.remote.worker.WorkerClusterManagerService; -import javax.annotation.Resource; import java.io.File; import java.util.Date; @@ -33,25 +32,21 @@ import java.util.Date; @Service public class CleanService { - @Resource - private GridFsManager gridFsManager; - @Resource - private InstanceInfoRepository instanceInfoRepository; - @Resource - private WorkflowInstanceInfoRepository workflowInstanceInfoRepository; - @Resource - private WorkflowNodeInfoRepository workflowNodeInfoRepository; - @Resource - private LockService lockService; + private final GridFsManager gridFsManager; - @Value("${oms.instanceinfo.retention}") - private int instanceInfoRetentionDay; + private final InstanceInfoRepository instanceInfoRepository; - @Value("${oms.container.retention.local}") - private int localContainerRetentionDay; - @Value("${oms.container.retention.remote}") - private int remoteContainerRetentionDay; + private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository; + private final WorkflowNodeInfoRepository workflowNodeInfoRepository; + + private final LockService lockService; + + private final int instanceInfoRetentionDay; + + private final int localContainerRetentionDay; + + private final int remoteContainerRetentionDay; private static final int TEMPORARY_RETENTION_DAY = 3; @@ -62,6 +57,21 @@ public class CleanService { private static final String HISTORY_DELETE_LOCK = "history_delete_lock"; + public CleanService(GridFsManager gridFsManager, InstanceInfoRepository instanceInfoRepository, WorkflowInstanceInfoRepository workflowInstanceInfoRepository, + WorkflowNodeInfoRepository workflowNodeInfoRepository, LockService lockService, + @Value("${oms.instanceinfo.retention}") int instanceInfoRetentionDay, + @Value("${oms.container.retention.local}") int localContainerRetentionDay, + @Value("${oms.container.retention.remote}") int remoteContainerRetentionDay) { + this.gridFsManager = gridFsManager; + this.instanceInfoRepository = instanceInfoRepository; + this.workflowInstanceInfoRepository = workflowInstanceInfoRepository; + this.workflowNodeInfoRepository = workflowNodeInfoRepository; + this.lockService = lockService; + this.instanceInfoRetentionDay = instanceInfoRetentionDay; + this.localContainerRetentionDay = localContainerRetentionDay; + this.remoteContainerRetentionDay = remoteContainerRetentionDay; + } + @Async(PJThreadPool.TIMING_POOL) @Scheduled(cron = CLEAN_TIME_EXPRESSION) @@ -84,7 +94,7 @@ public class CleanService { */ private void cleanByOneServer() { // 只要第一个server抢到锁其他server就会返回,所以锁10分钟应该足够了 - boolean lock = lockService.tryLock(HISTORY_DELETE_LOCK, 10 * 60 * 1000); + boolean lock = lockService.tryLock(HISTORY_DELETE_LOCK, 10 * 60 * 1000L); if (!lock) { log.info("[CleanService] clean job is already running, just return."); return; diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CoreScheduleTaskManager.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CoreScheduleTaskManager.java new file mode 100644 index 00000000..c36399fe --- /dev/null +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/CoreScheduleTaskManager.java @@ -0,0 +1,80 @@ +package tech.powerjob.server.core.scheduler; + +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.DisposableBean; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; + +/** + * @author Echo009 + * @since 2022/10/12 + */ +@Service +@Slf4j +@RequiredArgsConstructor +public class CoreScheduleTaskManager implements InitializingBean, DisposableBean { + + + private final PowerScheduleService powerScheduleService; + + private final InstanceStatusCheckService instanceStatusCheckService; + + private final List coreThreadContainer = new ArrayList<>(); + + + @SuppressWarnings("AlibabaAvoidManuallyCreateThread") + @Override + public void afterPropertiesSet() { + // 定时调度 + coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleCronJob", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleCronJob), "Thread-ScheduleCronJob")); + coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleCronWorkflow", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleCronWorkflow), "Thread-ScheduleCronWorkflow")); + coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleFrequentJob", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleFrequentJob), "Thread-ScheduleFrequentJob")); + // 数据清理 + coreThreadContainer.add(new Thread(new LoopRunnable("CleanWorkerData", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::cleanData), "Thread-CleanWorkerData")); + // 状态检查 + coreThreadContainer.add(new Thread(new LoopRunnable("CheckRunningInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkRunningInstance), "Thread-CheckRunningInstance")); + coreThreadContainer.add(new Thread(new LoopRunnable("CheckWaitingDispatchInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWaitingDispatchInstance), "Thread-CheckWaitingDispatchInstance")); + coreThreadContainer.add(new Thread(new LoopRunnable("CheckWaitingWorkerReceiveInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWaitingWorkerReceiveInstance), "Thread-CheckWaitingWorkerReceiveInstance")); + coreThreadContainer.add(new Thread(new LoopRunnable("CheckWorkflowInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWorkflowInstance), "Thread-CheckWorkflowInstance")); + + coreThreadContainer.forEach(Thread::start); + } + + @Override + public void destroy() { + coreThreadContainer.forEach(Thread::interrupt); + } + + + @RequiredArgsConstructor + private static class LoopRunnable implements Runnable { + + private final String taskName; + + private final Long runningInterval; + + private final Runnable innerRunnable; + + @SuppressWarnings("BusyWait") + @Override + public void run() { + log.info("start task : {}.", taskName); + while (true) { + try { + innerRunnable.run(); + Thread.sleep(runningInterval); + } catch (InterruptedException e) { + log.warn("[{}] task has been interrupted!", taskName, e); + break; + } catch (Exception e) { + log.error("[{}] task failed!", taskName, e); + } + } + } + } + +} diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/InstanceStatusCheckService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/InstanceStatusCheckService.java index b324923e..879cd749 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/InstanceStatusCheckService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/InstanceStatusCheckService.java @@ -1,29 +1,31 @@ package tech.powerjob.server.core.scheduler; -import tech.powerjob.common.enums.InstanceStatus; +import com.google.common.base.Stopwatch; +import com.google.common.collect.Lists; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.data.domain.PageRequest; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; import tech.powerjob.common.SystemInstanceResult; +import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.enums.WorkflowInstanceStatus; -import tech.powerjob.server.common.constants.PJThreadPool; +import tech.powerjob.server.common.Holder; import tech.powerjob.server.common.constants.SwitchableStatus; -import tech.powerjob.server.remote.transport.starter.AkkaStarter; -import tech.powerjob.server.persistence.remote.model.*; -import tech.powerjob.server.persistence.remote.repository.*; import tech.powerjob.server.core.DispatchService; import tech.powerjob.server.core.instance.InstanceManager; import tech.powerjob.server.core.workflow.WorkflowInstanceManager; -import com.google.common.base.Stopwatch; -import com.google.common.collect.Lists; -import lombok.extern.slf4j.Slf4j; -import org.springframework.scheduling.annotation.Async; -import org.springframework.scheduling.annotation.Scheduled; -import org.springframework.stereotype.Service; -import org.springframework.util.CollectionUtils; +import tech.powerjob.server.persistence.remote.model.InstanceInfoDO; +import tech.powerjob.server.persistence.remote.model.JobInfoDO; +import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; +import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO; +import tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo; +import tech.powerjob.server.persistence.remote.repository.*; +import tech.powerjob.server.remote.transport.starter.AkkaStarter; import javax.annotation.Resource; -import java.util.Date; -import java.util.List; -import java.util.Optional; +import java.util.*; import java.util.stream.Collectors; /** @@ -34,135 +36,220 @@ import java.util.stream.Collectors; */ @Slf4j @Service +@RequiredArgsConstructor public class InstanceStatusCheckService { - private static final int MAX_BATCH_NUM = 10; + private static final int MAX_BATCH_NUM_APP = 10; + private static final int MAX_BATCH_NUM_INSTANCE = 3000; + private static final int MAX_BATCH_UPDATE_NUM = 500; private static final long DISPATCH_TIMEOUT_MS = 30000; private static final long RECEIVE_TIMEOUT_MS = 60000; private static final long RUNNING_TIMEOUT_MS = 60000; private static final long WORKFLOW_WAITING_TIMEOUT_MS = 60000; - @Resource - private DispatchService dispatchService; - @Resource - private InstanceManager instanceManager; - @Resource - private WorkflowInstanceManager workflowInstanceManager; + public static final long CHECK_INTERVAL = 10000; - @Resource - private AppInfoRepository appInfoRepository; - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private WorkflowInstanceInfoRepository workflowInstanceInfoRepository; + private final DispatchService dispatchService; - @Async(PJThreadPool.TIMING_POOL) - @Scheduled(fixedDelay = 10000) - public void timingStatusCheck() { + private final InstanceManager instanceManager; + + private final WorkflowInstanceManager workflowInstanceManager; + + private final AppInfoRepository appInfoRepository; + + private final JobInfoRepository jobInfoRepository; + + private final InstanceInfoRepository instanceInfoRepository; + + + private final WorkflowInfoRepository workflowInfoRepository; + + private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository; + + public void checkWorkflowInstance() { Stopwatch stopwatch = Stopwatch.createStarted(); - - // 查询DB获取该Server需要负责的AppGroup - List appInfoList = appInfoRepository.findAllByCurrentServer(AkkaStarter.getActorSystemAddress()); - if (CollectionUtils.isEmpty(appInfoList)) { + // 查询 DB 获取该 Server 需要负责的 AppGroup + List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { log.info("[InstanceStatusChecker] current server has no app's job to check"); return; } - List allAppIds = appInfoList.stream().map(AppInfoDO::getId).collect(Collectors.toList()); - try { - checkInstance(allAppIds); checkWorkflowInstance(allAppIds); } catch (Exception e) { - log.error("[InstanceStatusChecker] status check failed.", e); + log.error("[InstanceStatusChecker] WorkflowInstance status check failed.", e); } - log.info("[InstanceStatusChecker] status check using {}.", stopwatch.stop()); + log.info("[InstanceStatusChecker] WorkflowInstance status check using {}.", stopwatch.stop()); } /** - * 检查任务实例的状态,发现异常及时重试,包括 + * 检查等待派发的实例 * WAITING_DISPATCH 超时:写入时间轮但未调度前 server down - * WAITING_WORKER_RECEIVE 超时:由于网络错误导致 worker 未接受成功 - * RUNNING 超时:TaskTracker down,断开与 server 的心跳连接 - * - * @param allAppIds 本系统所承担的所有 appIds */ - private void checkInstance(List allAppIds) { - - Lists.partition(allAppIds, MAX_BATCH_NUM).forEach(partAppIds -> { - // 1. 检查等待 WAITING_DISPATCH 状态的任务 - handleWaitingDispatchInstance(partAppIds); - // 2. 检查 WAITING_WORKER_RECEIVE 状态的任务 - handleWaitingWorkerReceiveInstance(partAppIds); - // 3. 检查 RUNNING 状态的任务(一定时间内没收到 TaskTracker 的状态报告,视为失败) - handleRunningInstance(partAppIds); - }); + public void checkWaitingDispatchInstance() { + Stopwatch stopwatch = Stopwatch.createStarted(); + // 查询 DB 获取该 Server 需要负责的 AppGroup + List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { + log.info("[InstanceStatusChecker] current server has no app's job to check"); + return; + } + try { + // 检查等待 WAITING_DISPATCH 状态的任务 + Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleWaitingDispatchInstance); + } catch (Exception e) { + log.error("[InstanceStatusChecker] WaitingDispatchInstance status check failed.", e); + } + log.info("[InstanceStatusChecker] WaitingDispatchInstance status check using {}.", stopwatch.stop()); } + /** + * 检查等待 worker 接收的实例 + * WAITING_WORKER_RECEIVE 超时:由于网络错误导致 worker 未接受成功 + */ + public void checkWaitingWorkerReceiveInstance() { + Stopwatch stopwatch = Stopwatch.createStarted(); + // 查询 DB 获取该 Server 需要负责的 AppGroup + List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { + log.info("[InstanceStatusChecker] current server has no app's job to check"); + return; + } + try { + // 检查 WAITING_WORKER_RECEIVE 状态的任务 + Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleWaitingWorkerReceiveInstance); + } catch (Exception e) { + log.error("[InstanceStatusChecker] WaitingWorkerReceiveInstance status check failed.", e); + } + log.info("[InstanceStatusChecker] WaitingWorkerReceiveInstance status check using {}.", stopwatch.stop()); + } + + /** + * 检查运行中的实例 + * RUNNING 超时:TaskTracker down,断开与 server 的心跳连接 + */ + public void checkRunningInstance() { + Stopwatch stopwatch = Stopwatch.createStarted(); + // 查询 DB 获取该 Server 需要负责的 AppGroup + List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { + log.info("[InstanceStatusChecker] current server has no app's job to check"); + return; + } + try { + // 检查 RUNNING 状态的任务(一定时间没收到 TaskTracker 的状态报告,视为失败) + Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleRunningInstance); + } catch (Exception e) { + log.error("[InstanceStatusChecker] RunningInstance status check failed.", e); + } + log.info("[InstanceStatusChecker] RunningInstance status check using {}.", stopwatch.stop()); + } private void handleWaitingDispatchInstance(List partAppIds) { // 1. 检查等待 WAITING_DISPATCH 状态的任务 long threshold = System.currentTimeMillis() - DISPATCH_TIMEOUT_MS; - List waitingDispatchInstances = instanceInfoRepository.findByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold); - if (!CollectionUtils.isEmpty(waitingDispatchInstances)) { - log.warn("[InstanceStatusChecker] find some instance which is not triggered as expected: {}", waitingDispatchInstances); - waitingDispatchInstances.forEach(instance -> { - - Optional jobInfoOpt = jobInfoRepository.findById(instance.getJobId()); - if (jobInfoOpt.isPresent()) { - dispatchService.redispatch(jobInfoOpt.get(), instance.getInstanceId()); - } else { - log.warn("[InstanceStatusChecker] can't find job by jobId[{}], so redispatch failed, failed instance: {}", instance.getJobId(), instance); - updateFailedInstance(instance, SystemInstanceResult.CAN_NOT_FIND_JOB_INFO); + List waitingDispatchInstances = instanceInfoRepository.findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE)); + while (!waitingDispatchInstances.isEmpty()) { + List overloadAppIdList = new ArrayList<>(); + long startTime = System.currentTimeMillis(); + // 按照 appId 分组处理,方便处理超载的逻辑 + Map> waitingDispatchInstancesMap = waitingDispatchInstances.stream().collect(Collectors.groupingBy(InstanceInfoDO::getAppId)); + for (Map.Entry> entry : waitingDispatchInstancesMap.entrySet()) { + final Long currentAppId = entry.getKey(); + final List currentAppWaitingDispatchInstances = entry.getValue(); + // collect job id + Set jobIds = currentAppWaitingDispatchInstances.stream().map(InstanceInfoDO::getJobId).collect(Collectors.toSet()); + // query job info and map + Map jobInfoMap = jobInfoRepository.findByIdIn(jobIds).stream().collect(Collectors.toMap(JobInfoDO::getId, e -> e)); + log.warn("[InstanceStatusChecker] find some instance in app({}) which is not triggered as expected: {}", currentAppId, currentAppWaitingDispatchInstances.stream().map(InstanceInfoDO::getInstanceId).collect(Collectors.toList())); + final Holder overloadFlag = new Holder<>(false); + // 先这么简单处理没问题,毕竟只有这一个地方用了 parallelStream + currentAppWaitingDispatchInstances.parallelStream().forEach(instance -> { + if (overloadFlag.get()) { + // 直接忽略 + return; + } + Optional jobInfoOpt = Optional.ofNullable(jobInfoMap.get(instance.getJobId())); + if (jobInfoOpt.isPresent()) { + // 处理等待派发的任务没有必要再重置一次状态,减少 io 次数 + dispatchService.dispatch(jobInfoOpt.get(), instance.getInstanceId(), Optional.of(instance), Optional.of(overloadFlag)); + } else { + log.warn("[InstanceStatusChecker] can't find job by jobId[{}], so redispatch failed, failed instance: {}", instance.getJobId(), instance); + final Optional opt = instanceInfoRepository.findById(instance.getId()); + opt.ifPresent(instanceInfoDO -> updateFailedInstance(instanceInfoDO, SystemInstanceResult.CAN_NOT_FIND_JOB_INFO)); + } + }); + threshold = System.currentTimeMillis() - DISPATCH_TIMEOUT_MS; + if (overloadFlag.get()) { + overloadAppIdList.add(currentAppId); } - }); + } + log.info("[InstanceStatusChecker] process {} task,use {} ms", waitingDispatchInstances.size(), System.currentTimeMillis() - startTime); + if (!overloadAppIdList.isEmpty()) { + log.warn("[InstanceStatusChecker] app[{}] is overload, so skip check waiting dispatch instance", overloadAppIdList); + partAppIds.removeAll(overloadAppIdList); + } + if (partAppIds.isEmpty()) { + break; + } + waitingDispatchInstances = instanceInfoRepository.findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE)); } + } private void handleWaitingWorkerReceiveInstance(List partAppIds) { // 2. 检查 WAITING_WORKER_RECEIVE 状态的任务 long threshold = System.currentTimeMillis() - RECEIVE_TIMEOUT_MS; - List waitingWorkerReceiveInstances = instanceInfoRepository.findByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold); - if (!CollectionUtils.isEmpty(waitingWorkerReceiveInstances)) { - log.warn("[InstanceStatusChecker] find one instance didn't receive any reply from worker, try to redispatch: {}", waitingWorkerReceiveInstances); - waitingWorkerReceiveInstances.forEach(instance -> { - // 重新派发 - JobInfoDO jobInfoDO = jobInfoRepository.findById(instance.getJobId()).orElseGet(JobInfoDO::new); - dispatchService.redispatch(jobInfoDO, instance.getInstanceId()); - }); + List waitingWorkerReceiveInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE)); + while (!waitingWorkerReceiveInstances.isEmpty()) { + log.warn("[InstanceStatusChecker] find some instance didn't receive any reply from worker, try to redispatch: {}", waitingWorkerReceiveInstances.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList())); + final List> partitions = Lists.partition(waitingWorkerReceiveInstances, MAX_BATCH_UPDATE_NUM); + for (List partition : partitions) { + dispatchService.redispatchBatchAsyncLockFree(partition.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()), InstanceStatus.WAITING_WORKER_RECEIVE.getV()); + } + // 重新查询 + threshold = System.currentTimeMillis() - RECEIVE_TIMEOUT_MS; + waitingWorkerReceiveInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE)); } } private void handleRunningInstance(List partAppIds) { // 3. 检查 RUNNING 状态的任务(一定时间没收到 TaskTracker 的状态报告,视为失败) long threshold = System.currentTimeMillis() - RUNNING_TIMEOUT_MS; - List failedInstances = instanceInfoRepository.findByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold)); - if (!CollectionUtils.isEmpty(failedInstances)) { - log.warn("[InstanceStatusCheckService] instances({}) has not received status report for a long time.", failedInstances); + List failedInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold), PageRequest.of(0, MAX_BATCH_NUM_INSTANCE)); + while (!failedInstances.isEmpty()) { + // collect job id + Set jobIds = failedInstances.stream().map(BriefInstanceInfo::getJobId).collect(Collectors.toSet()); + // query job info and map + Map jobInfoMap = jobInfoRepository.findByIdIn(jobIds).stream().collect(Collectors.toMap(JobInfoDO::getId, e -> e)); + log.warn("[InstanceStatusCheckService] find some instances have not received status report for a long time : {}", failedInstances.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList())); failedInstances.forEach(instance -> { - - JobInfoDO jobInfoDO = jobInfoRepository.findById(instance.getJobId()).orElseGet(JobInfoDO::new); - TimeExpressionType timeExpressionType = TimeExpressionType.of(jobInfoDO.getTimeExpressionType()); - SwitchableStatus switchableStatus = SwitchableStatus.of(jobInfoDO.getStatus()); - - // 如果任务已关闭,则不进行重试,将任务置为失败即可;秒级任务也直接置为失败,由派发器重新调度 - if (switchableStatus != SwitchableStatus.ENABLE || TimeExpressionType.FREQUENT_TYPES.contains(timeExpressionType.getV())) { - updateFailedInstance(instance, SystemInstanceResult.REPORT_TIMEOUT); + Optional jobInfoOpt = Optional.ofNullable(jobInfoMap.get(instance.getJobId())); + if (!jobInfoOpt.isPresent()) { + final Optional opt = instanceInfoRepository.findById(instance.getId()); + opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT)); return; } - - // CRON 和 API一样,失败次数 + 1,根据重试配置进行重试 - if (instance.getRunningTimes() < jobInfoDO.getInstanceRetryNum()) { - dispatchService.redispatch(jobInfoDO, instance.getInstanceId()); - } else { - updateFailedInstance(instance, SystemInstanceResult.REPORT_TIMEOUT); + TimeExpressionType timeExpressionType = TimeExpressionType.of(jobInfoOpt.get().getTimeExpressionType()); + SwitchableStatus switchableStatus = SwitchableStatus.of(jobInfoOpt.get().getStatus()); + // 如果任务已关闭,则不进行重试,将任务置为失败即可;秒级任务也直接置为失败,由派发器重新调度 + if (switchableStatus != SwitchableStatus.ENABLE || TimeExpressionType.FREQUENT_TYPES.contains(timeExpressionType.getV())) { + final Optional opt = instanceInfoRepository.findById(instance.getId()); + opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT)); + return; + } + // CRON 和 API一样,失败次数 + 1,根据重试配置进行重试 + if (instance.getRunningTimes() < jobInfoOpt.get().getInstanceRetryNum()) { + dispatchService.redispatchAsync(instance.getInstanceId(), InstanceStatus.RUNNING.getV()); + } else { + final Optional opt = instanceInfoRepository.findById(instance.getId()); + opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT)); } - }); + threshold = System.currentTimeMillis() - RUNNING_TIMEOUT_MS; + failedInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold), PageRequest.of(0, MAX_BATCH_NUM_INSTANCE)); } + } /** @@ -175,7 +262,7 @@ public class InstanceStatusCheckService { // 重试长时间处于 WAITING 状态的工作流实例 long threshold = System.currentTimeMillis() - WORKFLOW_WAITING_TIMEOUT_MS; - Lists.partition(allAppIds, MAX_BATCH_NUM).forEach(partAppIds -> { + Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(partAppIds -> { List waitingWfInstanceList = workflowInstanceInfoRepository.findByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, WorkflowInstanceStatus.WAITING.getV(), threshold); if (!CollectionUtils.isEmpty(waitingWfInstanceList)) { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/PowerScheduleService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/PowerScheduleService.java index 862acf8e..33bf97f3 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/PowerScheduleService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/scheduler/PowerScheduleService.java @@ -1,38 +1,33 @@ package tech.powerjob.server.core.scheduler; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.model.LifeCycle; -import tech.powerjob.server.common.constants.PJThreadPool; -import tech.powerjob.server.remote.transport.starter.AkkaStarter; import tech.powerjob.server.common.constants.SwitchableStatus; -import tech.powerjob.server.persistence.remote.model.AppInfoDO; +import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService; +import tech.powerjob.server.core.DispatchService; +import tech.powerjob.server.core.instance.InstanceService; +import tech.powerjob.server.core.service.JobService; +import tech.powerjob.server.core.workflow.WorkflowInstanceManager; import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; import tech.powerjob.server.persistence.remote.repository.AppInfoRepository; import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository; -import tech.powerjob.server.core.DispatchService; -import tech.powerjob.server.core.service.JobService; +import tech.powerjob.server.remote.transport.starter.AkkaStarter; import tech.powerjob.server.remote.worker.WorkerClusterManagerService; -import tech.powerjob.server.core.instance.InstanceService; -import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService; -import tech.powerjob.server.core.workflow.WorkflowInstanceManager; -import com.google.common.base.Stopwatch; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; -import org.springframework.scheduling.annotation.Async; -import org.springframework.scheduling.annotation.Scheduled; -import org.springframework.stereotype.Service; -import org.springframework.util.CollectionUtils; import javax.annotation.Resource; import java.util.*; -import java.util.stream.Collectors; /** * 任务调度执行服务(调度 CRON 表达式的任务进行执行) @@ -44,6 +39,7 @@ import java.util.stream.Collectors; */ @Slf4j @Service +@RequiredArgsConstructor public class PowerScheduleService { /** @@ -51,83 +47,105 @@ public class PowerScheduleService { */ private static final int MAX_APP_NUM = 10; - @Resource - private DispatchService dispatchService; - @Resource - private InstanceService instanceService; - @Resource - private WorkflowInstanceManager workflowInstanceManager; + private final DispatchService dispatchService; - @Resource - private AppInfoRepository appInfoRepository; - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; + private final InstanceService instanceService; - @Resource - private JobService jobService; - @Resource - private TimingStrategyService timingStrategyService; + private final WorkflowInstanceManager workflowInstanceManager; - private static final long SCHEDULE_RATE = 15000; + private final AppInfoRepository appInfoRepository; - @Async(PJThreadPool.TIMING_POOL) - @Scheduled(fixedDelay = SCHEDULE_RATE) - public void timingSchedule() { + private final JobInfoRepository jobInfoRepository; + private final WorkflowInfoRepository workflowInfoRepository; + + private final InstanceInfoRepository instanceInfoRepository; + + private final JobService jobService; + + private final TimingStrategyService timingStrategyService; + + public static final long SCHEDULE_RATE = 15000; + + + public void scheduleCronJob() { long start = System.currentTimeMillis(); - Stopwatch stopwatch = Stopwatch.createStarted(); - - // 先查询DB,查看本机需要负责的任务 - List allAppInfos = appInfoRepository.findAllByCurrentServer(AkkaStarter.getActorSystemAddress()); - if (CollectionUtils.isEmpty(allAppInfos)) { - log.info("[JobScheduleService] current server has no app's job to schedule."); - return; - } - List allAppIds = allAppInfos.stream().map(AppInfoDO::getId).collect(Collectors.toList()); - // 清理不需要维护的数据 - WorkerClusterManagerService.clean(allAppIds); - // 调度 CRON 表达式 JOB try { - scheduleCronJob(allAppIds); + final List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { + log.info("[CronJobSchedule] current server has no app's job to schedule."); + return; + } + scheduleCronJobCore(allAppIds); } catch (Exception e) { - log.error("[CronScheduler] schedule cron job failed.", e); + log.error("[CronJobSchedule] schedule cron job failed.", e); } - String cronTime = stopwatch.toString(); - stopwatch.reset().start(); - - // 调度 workflow 任务 - try { - scheduleWorkflow(allAppIds); - } catch (Exception e) { - log.error("[WorkflowScheduler] schedule workflow job failed.", e); - } - String wfTime = stopwatch.toString(); - stopwatch.reset().start(); - - // 调度 秒级任务 - try { - scheduleFrequentJob(allAppIds); - } catch (Exception e) { - log.error("[FrequentScheduler] schedule frequent job failed.", e); - } - - log.info("[JobScheduleService] cron schedule: {}, workflow schedule: {}, frequent schedule: {}.", cronTime, wfTime, stopwatch.stop()); - long cost = System.currentTimeMillis() - start; + log.info("[CronJobSchedule] cron job schedule use {} ms.", cost); if (cost > SCHEDULE_RATE) { - log.warn("[JobScheduleService] The database query is using too much time({}ms), please check if the database load is too high!", cost); + log.warn("[CronJobSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost); + } + } + + public void scheduleCronWorkflow() { + long start = System.currentTimeMillis(); + // 调度 CRON 表达式 WORKFLOW + try { + final List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { + log.info("[CronWorkflowSchedule] current server has no app's workflow to schedule."); + return; + } + scheduleWorkflowCore(allAppIds); + } catch (Exception e) { + log.error("[CronWorkflowSchedule] schedule cron workflow failed.", e); + } + long cost = System.currentTimeMillis() - start; + log.info("[CronWorkflowSchedule] cron workflow schedule use {} ms.", cost); + if (cost > SCHEDULE_RATE) { + log.warn("[CronWorkflowSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost); + } + } + + + public void scheduleFrequentJob() { + long start = System.currentTimeMillis(); + // 调度 FIX_RATE/FIX_DELAY 表达式 JOB + try { + final List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (CollectionUtils.isEmpty(allAppIds)) { + log.info("[FrequentJobSchedule] current server has no app's job to schedule."); + return; + } + scheduleFrequentJobCore(allAppIds); + } catch (Exception e) { + log.error("[FrequentJobSchedule] schedule frequent job failed.", e); + } + long cost = System.currentTimeMillis() - start; + log.info("[FrequentJobSchedule] frequent job schedule use {} ms.", cost); + if (cost > SCHEDULE_RATE) { + log.warn("[FrequentJobSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost); + } + } + + + public void cleanData() { + try { + final List allAppIds = appInfoRepository.listAppIdByCurrentServer(AkkaStarter.getActorSystemAddress()); + if (allAppIds.isEmpty()) { + return; + } + WorkerClusterManagerService.clean(allAppIds); + } catch (Exception e) { + log.error("[CleanData] clean data failed.", e); } } /** * 调度 CRON 表达式类型的任务 */ - private void scheduleCronJob(List appIds) { + private void scheduleCronJobCore(List appIds) { long nowTime = System.currentTimeMillis(); long timeThreshold = nowTime + 2 * SCHEDULE_RATE; @@ -147,7 +165,7 @@ public class PowerScheduleService { log.info("[CronScheduler] These cron jobs will be scheduled: {}.", jobInfos); jobInfos.forEach(jobInfo -> { - Long instanceId = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(), null, null, jobInfo.getNextTriggerTime()); + Long instanceId = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(), null, null, jobInfo.getNextTriggerTime()).getInstanceId(); jobId2InstanceId.put(jobInfo.getId(), instanceId); }); instanceInfoRepository.flush(); @@ -165,7 +183,7 @@ public class PowerScheduleService { delay = targetTriggerTime - nowTime; } - InstanceTimeWheelService.schedule(instanceId, delay, () -> dispatchService.dispatch(jobInfoDO, instanceId)); + InstanceTimeWheelService.schedule(instanceId, delay, () -> dispatchService.dispatch(jobInfoDO, instanceId, Optional.empty(), Optional.empty())); }); // 3. 计算下一次调度时间(忽略5S内的重复执行,即CRON模式下最小的连续执行间隔为 SCHEDULE_RATE ms) @@ -185,7 +203,7 @@ public class PowerScheduleService { }); } - private void scheduleWorkflow(List appIds) { + private void scheduleWorkflowCore(List appIds) { long nowTime = System.currentTimeMillis(); long timeThreshold = nowTime + 2 * SCHEDULE_RATE; @@ -220,7 +238,7 @@ public class PowerScheduleService { }); } - private void scheduleFrequentJob(List appIds) { + private void scheduleFrequentJobCore(List appIds) { Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> { try { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/AppInfoService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/AppInfoService.java index 66863750..e768bd77 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/AppInfoService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/AppInfoService.java @@ -1,5 +1,6 @@ package tech.powerjob.server.core.service; +import lombok.RequiredArgsConstructor; import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.server.persistence.remote.model.AppInfoDO; import tech.powerjob.server.persistence.remote.repository.AppInfoRepository; @@ -15,10 +16,10 @@ import java.util.Objects; * @since 2020/6/20 */ @Service +@RequiredArgsConstructor public class AppInfoService { - @Resource - private AppInfoRepository appInfoRepository; + private final AppInfoRepository appInfoRepository; /** * 验证应用访问权限 diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/CacheService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/CacheService.java index 619e5356..491ea328 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/CacheService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/CacheService.java @@ -1,17 +1,16 @@ package tech.powerjob.server.core.service; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; import tech.powerjob.server.persistence.remote.model.InstanceInfoDO; import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Service; -import javax.annotation.Resource; import java.time.Duration; import java.util.Optional; @@ -25,19 +24,23 @@ import java.util.Optional; @Service public class CacheService { - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; + private final JobInfoRepository jobInfoRepository; + + private final WorkflowInfoRepository workflowInfoRepository; + + private final InstanceInfoRepository instanceInfoRepository; private final Cache jobId2JobNameCache; private final Cache workflowId2WorkflowNameCache; private final Cache instanceId2AppId; private final Cache jobId2AppId; - public CacheService() { + public CacheService(JobInfoRepository jobInfoRepository, WorkflowInfoRepository workflowInfoRepository, InstanceInfoRepository instanceInfoRepository) { + + this.jobInfoRepository = jobInfoRepository; + this.workflowInfoRepository = workflowInfoRepository; + this.instanceInfoRepository = instanceInfoRepository; + jobId2JobNameCache = CacheBuilder.newBuilder() .expireAfterWrite(Duration.ofMinutes(1)) .maximumSize(512) diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/JobService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/JobService.java index b3fe7703..dc58de93 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/JobService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/JobService.java @@ -2,6 +2,7 @@ package tech.powerjob.server.core.service; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; import org.springframework.data.jpa.domain.Specification; @@ -43,20 +44,18 @@ import java.util.stream.Collectors; */ @Slf4j @Service +@RequiredArgsConstructor public class JobService { - @Resource - private InstanceService instanceService; + private final InstanceService instanceService; - @Resource - private DispatchService dispatchService; - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; - @Resource - private TimingStrategyService timingStrategyService; + private final DispatchService dispatchService; + private final JobInfoRepository jobInfoRepository; + + private final InstanceInfoRepository instanceInfoRepository; + + private final TimingStrategyService timingStrategyService; /** * 保存/修改任务 @@ -173,15 +172,15 @@ public class JobService { JobInfoDO jobInfo = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by id:" + jobId)); log.info("[Job-{}] try to run job in app[{}], instanceParams={},delay={} ms.", jobInfo.getId(), appId, instanceParams, delay); - Long instanceId = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(), instanceParams, null, System.currentTimeMillis() + Math.max(delay, 0)); + final InstanceInfoDO instanceInfo = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(), instanceParams, null, System.currentTimeMillis() + Math.max(delay, 0)); instanceInfoRepository.flush(); if (delay <= 0) { - dispatchService.dispatch(jobInfo, instanceId); + dispatchService.dispatch(jobInfo, instanceInfo.getInstanceId(), Optional.of(instanceInfo),Optional.empty()); } else { - InstanceTimeWheelService.schedule(instanceId, delay, () -> dispatchService.dispatch(jobInfo, instanceId)); + InstanceTimeWheelService.schedule(instanceInfo.getInstanceId(), delay, () -> dispatchService.dispatch(jobInfo, instanceInfo.getInstanceId(), Optional.empty(),Optional.empty())); } - log.info("[Job-{}|{}] execute 'runJob' successfully, params={}", jobInfo.getId(), instanceId, instanceParams); - return instanceId; + log.info("[Job-{}|{}] execute 'runJob' successfully, params={}", jobInfo.getId(), instanceInfo.getInstanceId(), instanceParams); + return instanceInfo.getInstanceId(); } @@ -205,9 +204,8 @@ public class JobService { * 启用某个任务 * * @param jobId 任务ID - * @throws ParseException 异常(CRON表达式错误) */ - public void enableJob(Long jobId) throws ParseException { + public void enableJob(Long jobId) { JobInfoDO jobInfoDO = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId:" + jobId)); jobInfoDO.setStatus(SwitchableStatus.ENABLE.getV()); diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/UserService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/UserService.java index 6af93504..6acea5bd 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/UserService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/service/UserService.java @@ -5,7 +5,7 @@ import tech.powerjob.server.persistence.remote.repository.UserInfoRepository; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import org.springframework.stereotype.Service; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import javax.annotation.Resource; import java.util.Date; diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/IdGenerateService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/IdGenerateService.java index 2f42e087..d583f69b 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/IdGenerateService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/IdGenerateService.java @@ -18,11 +18,10 @@ import tech.powerjob.server.remote.server.self.ServerInfoService; public class IdGenerateService { private final SnowFlakeIdGenerator snowFlakeIdGenerator; + private static final int DATA_CENTER_ID = 0; - @Autowired public IdGenerateService(ServerInfoService serverInfoService) { - long id = serverInfoService.fetchServiceInfo().getId(); snowFlakeIdGenerator = new SnowFlakeIdGenerator(DATA_CENTER_ID, id); log.info("[IdGenerateService] initialize IdGenerateService successfully, ID:{}", id); diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/SnowFlakeIdGenerator.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/SnowFlakeIdGenerator.java index c700cd60..b3f70241 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/SnowFlakeIdGenerator.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/uid/SnowFlakeIdGenerator.java @@ -7,37 +7,50 @@ package tech.powerjob.server.core.uid; * @since 2020/4/6 */ public class SnowFlakeIdGenerator { - /** * 起始的时间戳(a special day for me) */ private final static long START_STAMP = 1555776000000L; - /** - * 每一部分占用的位数 + * 序列号占用的位数 */ - private final static long SEQUENCE_BIT = 6; //序列号占用的位数 - private final static long MACHINE_BIT = 14; //机器标识占用的位数 - private final static long DATA_CENTER_BIT = 2;//数据中心占用的位数 - + private final static long SEQUENCE_BIT = 6; + /** + * 机器标识占用的位数 + */ + private final static long MACHINE_BIT = 14; + /** + * 数据中心占用的位数 + */ + private final static long DATA_CENTER_BIT = 2; /** * 每一部分的最大值 */ private final static long MAX_DATA_CENTER_NUM = ~(-1L << DATA_CENTER_BIT); private final static long MAX_MACHINE_NUM = ~(-1L << MACHINE_BIT); private final static long MAX_SEQUENCE = ~(-1L << SEQUENCE_BIT); - /** * 每一部分向左的位移 */ private final static long MACHINE_LEFT = SEQUENCE_BIT; private final static long DATA_CENTER_LEFT = SEQUENCE_BIT + MACHINE_BIT; private final static long TIMESTAMP_LEFT = DATA_CENTER_LEFT + DATA_CENTER_BIT; - - private final long dataCenterId; //数据中心 - private final long machineId; //机器标识 - private long sequence = 0L; //序列号 - private long lastTimestamp = -1L;//上一次时间戳 + /** + * 数据中心 + */ + private final long dataCenterId; + /** + * 机器标识 + */ + private final long machineId; + /** + * 序列号 + */ + private long sequence = 0L; + /** + * 上一次时间戳 + */ + private long lastTimestamp = -1L; public SnowFlakeIdGenerator(long dataCenterId, long machineId) { if (dataCenterId > MAX_DATA_CENTER_NUM || dataCenterId < 0) { @@ -56,7 +69,7 @@ public class SnowFlakeIdGenerator { public synchronized long nextId() { long currStamp = getNewStamp(); if (currStamp < lastTimestamp) { - throw new RuntimeException("clock moved backwards, refusing to generate id"); + return futureId(); } if (currStamp == lastTimestamp) { @@ -79,6 +92,22 @@ public class SnowFlakeIdGenerator { | sequence; //序列号部分 } + /** + * 发生时钟回拨时借用未来时间生成Id,避免运行过程中任务调度和工作流直接进入不可用状态 + * 注:该方式不可解决原算法中停服状态下时钟回拨导致的重复id问题 + */ + private long futureId() { + sequence = (sequence + 1) & MAX_SEQUENCE; + if (sequence == 0L) { + lastTimestamp = lastTimestamp + 1; + } + + return (lastTimestamp - START_STAMP) << TIMESTAMP_LEFT //时间戳部分 + | dataCenterId << DATA_CENTER_LEFT //数据中心部分 + | machineId << MACHINE_LEFT //机器标识部分 + | sequence; //序列号部分 + } + private long getNextMill() { long mill = getNewStamp(); while (mill <= lastTimestamp) { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/JobNodeValidator.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/JobNodeValidator.java index e95a34e5..7765aec8 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/JobNodeValidator.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/JobNodeValidator.java @@ -1,5 +1,6 @@ package tech.powerjob.server.core.validator; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; import tech.powerjob.common.enums.WorkflowNodeType; @@ -18,10 +19,10 @@ import javax.annotation.Resource; */ @Component @Slf4j +@RequiredArgsConstructor public class JobNodeValidator implements NodeValidator { - @Resource - private JobInfoRepository jobInfoRepository; + private final JobInfoRepository jobInfoRepository; @Override public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/NestedWorkflowNodeValidator.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/NestedWorkflowNodeValidator.java index 08147e0c..0c22fa1b 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/NestedWorkflowNodeValidator.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/validator/NestedWorkflowNodeValidator.java @@ -1,6 +1,7 @@ package tech.powerjob.server.core.validator; import com.alibaba.fastjson.JSON; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; import tech.powerjob.common.enums.WorkflowNodeType; @@ -13,7 +14,6 @@ import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO; import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository; -import javax.annotation.Resource; import java.util.Objects; import java.util.Optional; @@ -23,12 +23,12 @@ import java.util.Optional; */ @Component @Slf4j +@RequiredArgsConstructor public class NestedWorkflowNodeValidator implements NodeValidator { - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private WorkflowNodeInfoRepository workflowNodeInfoRepository; + private final WorkflowInfoRepository workflowInfoRepository; + + private final WorkflowNodeInfoRepository workflowNodeInfoRepository; @Override public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) { diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceManager.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceManager.java index 0cd9373c..68535a32 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceManager.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceManager.java @@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.TypeReference; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.BeanUtils; @@ -18,6 +19,7 @@ import tech.powerjob.common.model.PEWorkflowDAG; import tech.powerjob.common.serialize.JsonUtils; import tech.powerjob.common.utils.CommonUtils; import tech.powerjob.server.common.constants.SwitchableStatus; +import tech.powerjob.server.common.utils.SpringUtils; import tech.powerjob.server.core.helper.StatusMappingHelper; import tech.powerjob.server.core.lock.UseCacheLock; import tech.powerjob.server.core.service.UserService; @@ -32,7 +34,6 @@ import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository; -import javax.annotation.Resource; import java.util.*; import java.util.stream.Collectors; @@ -47,25 +48,25 @@ import static tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils.isNo */ @Slf4j @Service +@RequiredArgsConstructor @SuppressWarnings("squid:S1192") public class WorkflowInstanceManager { - @Resource - private AlarmCenter alarmCenter; - @Resource - private IdGenerateService idGenerateService; - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private UserService userService; - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private WorkflowInstanceInfoRepository workflowInstanceInfoRepository; - @Resource - private WorkflowNodeInfoRepository workflowNodeInfoRepository; - @Resource - private WorkflowNodeHandleService workflowNodeHandleService; + private final AlarmCenter alarmCenter; + + private final IdGenerateService idGenerateService; + + private final JobInfoRepository jobInfoRepository; + + private final UserService userService; + + private final WorkflowInfoRepository workflowInfoRepository; + + private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository; + + private final WorkflowNodeInfoRepository workflowNodeInfoRepository; + + private final WorkflowNodeHandleService workflowNodeHandleService; /** * 创建工作流任务实例 @@ -440,10 +441,10 @@ public class WorkflowInstanceManager { if (workflowInstanceStatus == WorkflowInstanceStatus.SUCCEED){ HashMap wfContext = JSON.parseObject(wfInstance.getWfContext(), new TypeReference>() { }); - updateWorkflowContext(wfInstance.getParentWfInstanceId(),wfContext); + SpringUtils.getBean(this.getClass()).updateWorkflowContext(wfInstance.getParentWfInstanceId(), wfContext); } - // 处理父工作流 - move(wfInstance.getParentWfInstanceId(), wfInstance.getWfInstanceId(), StatusMappingHelper.toInstanceStatus(workflowInstanceStatus), result); + // 处理父工作流, fix https://github.com/PowerJob/PowerJob/issues/465 + SpringUtils.getBean(this.getClass()).move(wfInstance.getParentWfInstanceId(), wfInstance.getWfInstanceId(), StatusMappingHelper.toInstanceStatus(workflowInstanceStatus), result); } // 报警 diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceService.java index fae06d15..047a706d 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowInstanceService.java @@ -1,14 +1,20 @@ package tech.powerjob.server.core.workflow; import com.alibaba.fastjson.JSON; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; +import org.springframework.stereotype.Service; +import tech.powerjob.common.SystemInstanceResult; import tech.powerjob.common.enums.InstanceStatus; +import tech.powerjob.common.enums.WorkflowInstanceStatus; import tech.powerjob.common.enums.WorkflowNodeType; import tech.powerjob.common.exception.PowerJobException; -import tech.powerjob.common.SystemInstanceResult; -import tech.powerjob.common.enums.WorkflowInstanceStatus; import tech.powerjob.common.model.PEWorkflowDAG; import tech.powerjob.common.response.WorkflowInstanceInfoDTO; import tech.powerjob.server.common.constants.SwitchableStatus; +import tech.powerjob.server.common.utils.SpringUtils; +import tech.powerjob.server.core.instance.InstanceService; import tech.powerjob.server.core.lock.UseCacheLock; import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils; import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; @@ -16,12 +22,7 @@ import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO; import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository; import tech.powerjob.server.remote.server.redirector.DesignateServer; -import tech.powerjob.server.core.instance.InstanceService; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; -import org.springframework.stereotype.Service; -import javax.annotation.Resource; import java.util.Date; import java.util.Objects; import java.util.Optional; @@ -35,18 +36,16 @@ import java.util.Optional; */ @Slf4j @Service +@RequiredArgsConstructor public class WorkflowInstanceService { - @Resource - private InstanceService instanceService; - @Resource - private WorkflowInstanceInfoRepository wfInstanceInfoRepository; - @Resource - private WorkflowInstanceManager workflowInstanceManager; - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private WorkflowInstanceService self; + private final InstanceService instanceService; + + private final WorkflowInstanceInfoRepository wfInstanceInfoRepository; + + private final WorkflowInstanceManager workflowInstanceManager; + + private final WorkflowInfoRepository workflowInfoRepository; /** * 停止工作流实例(入口) @@ -61,10 +60,10 @@ public class WorkflowInstanceService { } // 如果这是一个被嵌套的工作流,则终止父工作流 if (wfInstance.getParentWfInstanceId() != null) { - self.stopWorkflowInstance(wfInstance.getParentWfInstanceId(), appId); + SpringUtils.getBean(this.getClass()).stopWorkflowInstance(wfInstance.getParentWfInstanceId(), appId); return; } - self.stopWorkflowInstance(wfInstanceId, appId); + SpringUtils.getBean(this.getClass()).stopWorkflowInstance(wfInstanceId, appId); } /** diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowService.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowService.java index 9db75a07..3a86cfb5 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowService.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/WorkflowService.java @@ -7,7 +7,7 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.model.LifeCycle; diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/JobNodeHandler.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/JobNodeHandler.java index 531123e9..a3fc30ed 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/JobNodeHandler.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/JobNodeHandler.java @@ -1,11 +1,13 @@ package tech.powerjob.server.core.workflow.hanlder.impl; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.enums.WorkflowNodeType; import tech.powerjob.common.model.PEWorkflowDAG; +import tech.powerjob.server.common.utils.SpringUtils; import tech.powerjob.server.core.DispatchService; import tech.powerjob.server.core.instance.InstanceService; import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler; @@ -13,7 +15,7 @@ import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO; import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; -import javax.annotation.Resource; +import java.util.Optional; /** * @author Echo009 @@ -21,21 +23,15 @@ import javax.annotation.Resource; */ @Slf4j @Component +@RequiredArgsConstructor public class JobNodeHandler implements TaskNodeHandler { - @Resource - private InstanceService instanceService; - - @Resource - private JobInfoRepository jobInfoRepository; - - @Resource - private DispatchService dispatchService; + private final JobInfoRepository jobInfoRepository; @Override public void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) { // instanceParam 传递的是工作流实例的 wfContext - Long instanceId = instanceService.create(node.getJobId(), wfInstanceInfo.getAppId(), node.getNodeParams(), wfInstanceInfo.getWfContext(), wfInstanceInfo.getWfInstanceId(), System.currentTimeMillis()); + Long instanceId = SpringUtils.getBean(InstanceService.class).create(node.getJobId(), wfInstanceInfo.getAppId(), node.getNodeParams(), wfInstanceInfo.getWfContext(), wfInstanceInfo.getWfInstanceId(), System.currentTimeMillis()).getInstanceId(); node.setInstanceId(instanceId); node.setStatus(InstanceStatus.RUNNING.getV()); log.info("[Workflow-{}|{}] create readyNode(JOB) instance(nodeId={},jobId={},instanceId={}) successfully~", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId(), instanceId); @@ -46,7 +42,7 @@ public class JobNodeHandler implements TaskNodeHandler { JobInfoDO jobInfo = jobInfoRepository.findById(node.getJobId()).orElseGet(JobInfoDO::new); // 洗去时间表达式类型 jobInfo.setTimeExpressionType(TimeExpressionType.WORKFLOW.getV()); - dispatchService.dispatch(jobInfo, node.getInstanceId()); + SpringUtils.getBean(DispatchService.class).dispatch(jobInfo, node.getInstanceId(), Optional.empty(), Optional.empty()); } @Override diff --git a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/NestedWorkflowNodeHandler.java b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/NestedWorkflowNodeHandler.java index e7d0de99..b08413a6 100644 --- a/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/NestedWorkflowNodeHandler.java +++ b/powerjob-server/powerjob-server-core/src/main/java/tech/powerjob/server/core/workflow/hanlder/impl/NestedWorkflowNodeHandler.java @@ -1,6 +1,7 @@ package tech.powerjob.server.core.workflow.hanlder.impl; import com.alibaba.fastjson.JSON; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; import tech.powerjob.common.SystemInstanceResult; @@ -11,6 +12,7 @@ import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.model.PEWorkflowDAG; import tech.powerjob.common.utils.CommonUtils; import tech.powerjob.server.common.constants.SwitchableStatus; +import tech.powerjob.server.common.utils.SpringUtils; import tech.powerjob.server.core.workflow.WorkflowInstanceManager; import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils; import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler; @@ -19,7 +21,6 @@ import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO; import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository; -import javax.annotation.Resource; import java.util.Date; /** @@ -28,16 +29,12 @@ import java.util.Date; */ @Component @Slf4j +@RequiredArgsConstructor public class NestedWorkflowNodeHandler implements TaskNodeHandler { - @Resource - private WorkflowInfoRepository workflowInfoRepository; + private final WorkflowInfoRepository workflowInfoRepository; - @Resource - private WorkflowInstanceInfoRepository workflowInstanceInfoRepository; - - @Resource - private WorkflowInstanceManager workflowInstanceManager; + private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository; @Override public void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) { @@ -78,7 +75,7 @@ public class NestedWorkflowNodeHandler implements TaskNodeHandler { } else { // 透传当前的上下文创建新的工作流实例 String wfContext = wfInstanceInfo.getWfContext(); - Long instanceId = workflowInstanceManager.create(targetWf, wfContext, System.currentTimeMillis(), wfInstanceInfo.getWfInstanceId()); + Long instanceId = SpringUtils.getBean(WorkflowInstanceManager.class).create(targetWf, wfContext, System.currentTimeMillis(), wfInstanceInfo.getWfInstanceId()); node.setInstanceId(instanceId); } node.setStartTime(CommonUtils.formatTime(System.currentTimeMillis())); @@ -89,7 +86,7 @@ public class NestedWorkflowNodeHandler implements TaskNodeHandler { public void startTaskInstance(PEWorkflowDAG.Node node) { Long wfId = node.getJobId(); WorkflowInfoDO targetWf = workflowInfoRepository.findById(wfId).orElse(null); - workflowInstanceManager.start(targetWf, node.getInstanceId()); + SpringUtils.getBean(WorkflowInstanceManager.class).start(targetWf, node.getInstanceId()); } @Override diff --git a/powerjob-server/powerjob-server-extension/pom.xml b/powerjob-server/powerjob-server-extension/pom.xml index 7a82f1c5..29ab5693 100644 --- a/powerjob-server/powerjob-server-extension/pom.xml +++ b/powerjob-server/powerjob-server-extension/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/DatabaseLockService.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/DatabaseLockService.java index d329e96b..15a3d91e 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/DatabaseLockService.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/DatabaseLockService.java @@ -21,6 +21,7 @@ import org.springframework.stereotype.Service; public class DatabaseLockService implements LockService { private final String ownerIp; + private final OmsLockRepository omsLockRepository; @Autowired diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/AlarmCenter.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/AlarmCenter.java index 48914cf5..118e5d41 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/AlarmCenter.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/AlarmCenter.java @@ -24,9 +24,9 @@ import java.util.concurrent.*; public class AlarmCenter { private final ExecutorService POOL; + private final List BEANS = Lists.newLinkedList(); - @Autowired public AlarmCenter(List alarmables) { int cores = Runtime.getRuntime().availableProcessors(); ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("AlarmPool-%d").build(); diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkAlarmService.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkAlarmService.java index ab705c0a..b42d9c05 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkAlarmService.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkAlarmService.java @@ -1,5 +1,6 @@ package tech.powerjob.server.extension.defaultimpl.alarm.impl; +import lombok.RequiredArgsConstructor; import tech.powerjob.common.OmsConstant; import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.utils.NetUtils; @@ -30,17 +31,19 @@ import java.util.Set; */ @Slf4j @Service +@RequiredArgsConstructor public class DingTalkAlarmService implements Alarmable { - @Resource - private Environment environment; + private final Environment environment; private Long agentId; private DingTalkUtils dingTalkUtils; private Cache mobile2UserIdCache; private static final int CACHE_SIZE = 8192; - // 防止缓存击穿 + /** + * 防止缓存击穿 + */ private static final String EMPTY_TAG = "EMPTY"; @Override diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkUtils.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkUtils.java index 39425c10..e2cd77f0 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkUtils.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/DingTalkUtils.java @@ -125,7 +125,7 @@ public class DingTalkUtils implements Closeable { @AllArgsConstructor public static final class MarkdownEntity { - private String title; - private String detail; + private final String title; + private final String detail; } } diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/MailAlarmService.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/MailAlarmService.java index ff66219a..d9438879 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/MailAlarmService.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/MailAlarmService.java @@ -1,5 +1,7 @@ package tech.powerjob.server.extension.defaultimpl.alarm.impl; +import org.springframework.beans.factory.annotation.Value; +import org.apache.commons.lang3.StringUtils; import tech.powerjob.server.persistence.remote.model.UserInfoDO; import tech.powerjob.server.extension.defaultimpl.alarm.module.Alarm; import tech.powerjob.server.extension.Alarmable; @@ -10,7 +12,6 @@ import org.springframework.mail.SimpleMailMessage; import org.springframework.mail.javamail.JavaMailSender; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; import javax.annotation.Resource; import java.util.List; @@ -31,12 +32,11 @@ public class MailAlarmService implements Alarmable { private JavaMailSender javaMailSender; + @Value("${spring.mail.username:''}") private String from; - private static final String FROM_KEY = "spring.mail.username"; @Override public void onFailed(Alarm alarm, List targetUserList) { - initFrom(); if (CollectionUtils.isEmpty(targetUserList) || javaMailSender == null || StringUtils.isEmpty(from)) { return; } @@ -59,10 +59,4 @@ public class MailAlarmService implements Alarmable { this.javaMailSender = javaMailSender; } - // 不能直接使用 @Value 注入,不存在的时候会报错 - private void initFrom() { - if (StringUtils.isEmpty(from)) { - from = environment.getProperty(FROM_KEY); - } - } } diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/WebHookAlarmService.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/WebHookAlarmService.java index a36b9000..7c46dc2f 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/WebHookAlarmService.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/impl/WebHookAlarmService.java @@ -11,7 +11,7 @@ import okhttp3.MediaType; import okhttp3.RequestBody; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.List; diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/JobInstanceAlarm.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/JobInstanceAlarm.java index 13e091ec..4924966e 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/JobInstanceAlarm.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/JobInstanceAlarm.java @@ -10,38 +10,70 @@ import lombok.Data; */ @Data public class JobInstanceAlarm implements Alarm { - // 应用ID + /** + * 应用ID + */ private long appId; - // 任务ID + /** + * 任务ID + */ private long jobId; - // 任务实例ID + /** + * 任务实例ID + */ private long instanceId; - // 任务名称 + /** + * 任务名称 + */ private String jobName; - // 任务自带的参数 + /** + * 任务自带的参数 + */ private String jobParams; - // 时间表达式类型(CRON/API/FIX_RATE/FIX_DELAY) + /** + * 时间表达式类型(CRON/API/FIX_RATE/FIX_DELAY) + */ private Integer timeExpressionType; - // 时间表达式,CRON/NULL/LONG/LONG + /** + * 时间表达式,CRON/NULL/LONG/LONG + */ private String timeExpression; - // 执行类型,单机/广播/MR + /** + * 执行类型,单机/广播/MR + */ private Integer executeType; - // 执行器类型,Java/Shell + /** + * 执行器类型,Java/Shell + */ private Integer processorType; - // 执行器信息 + /** + * 执行器信息 + */ private String processorInfo; - // 任务实例参数 + /** + * 任务实例参数 + */ private String instanceParams; - // 执行结果 + /** + * 执行结果 + */ private String result; - // 预计触发时间 + /** + * 预计触发时间 + */ private Long expectedTriggerTime; - // 实际触发时间 + /** + * 实际触发时间 + */ private Long actualTriggerTime; - // 结束时间 + /** + * 结束时间 + */ private Long finishedTime; - // TaskTracker地址 + /** + * + */ private String taskTrackerAddress; @Override diff --git a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/WorkflowInstanceAlarm.java b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/WorkflowInstanceAlarm.java index 3435de1c..9639dc54 100644 --- a/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/WorkflowInstanceAlarm.java +++ b/powerjob-server/powerjob-server-extension/src/main/java/tech/powerjob/server/extension/defaultimpl/alarm/module/WorkflowInstanceAlarm.java @@ -14,25 +14,39 @@ public class WorkflowInstanceAlarm implements Alarm { private String workflowName; - // 任务所属应用的ID,冗余提高查询效率 + /** + * 任务所属应用的ID,冗余提高查询效率 + */ private Long appId; private Long workflowId; - // workflowInstanceId(任务实例表都使用单独的ID作为主键以支持潜在的分表需求) + /** + * workflowInstanceId(任务实例表都使用单独的ID作为主键以支持潜在的分表需求) + */ private Long wfInstanceId; - // workflow 状态(WorkflowInstanceStatus) + /** + * workflow 状态(WorkflowInstanceStatus) + */ private Integer status; private PEWorkflowDAG peWorkflowDAG; private String result; - // 实际触发时间 + /** + * 实际触发时间 + */ private Long actualTriggerTime; - // 结束时间 + /** + * 结束时间 + */ private Long finishedTime; - // 时间表达式类型(CRON/API/FIX_RATE/FIX_DELAY) + /** + * 时间表达式类型(CRON/API/FIX_RATE/FIX_DELAY) + */ private Integer timeExpressionType; - // 时间表达式,CRON/NULL/LONG/LONG + /** + * 时间表达式,CRON/NULL/LONG/LONG + */ private String timeExpression; @Override diff --git a/powerjob-server/powerjob-server-migrate/pom.xml b/powerjob-server/powerjob-server-migrate/pom.xml index 50c43813..9b8b32b6 100644 --- a/powerjob-server/powerjob-server-migrate/pom.xml +++ b/powerjob-server/powerjob-server-migrate/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-migrate/src/main/java/tech/powerjob/server/migrate/V3ToV4MigrateService.java b/powerjob-server/powerjob-server-migrate/src/main/java/tech/powerjob/server/migrate/V3ToV4MigrateService.java index 3a07857f..b9781f9d 100644 --- a/powerjob-server/powerjob-server-migrate/src/main/java/tech/powerjob/server/migrate/V3ToV4MigrateService.java +++ b/powerjob-server/powerjob-server-migrate/src/main/java/tech/powerjob/server/migrate/V3ToV4MigrateService.java @@ -2,9 +2,11 @@ package tech.powerjob.server.migrate; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; +import lombok.RequiredArgsConstructor; import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.enums.ProcessorType; import tech.powerjob.common.model.PEWorkflowDAG; +import tech.powerjob.server.common.utils.SpringUtils; import tech.powerjob.server.extension.LockService; import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; @@ -20,7 +22,6 @@ import org.springframework.data.jpa.domain.Specification; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; -import javax.annotation.Resource; import javax.persistence.criteria.Predicate; import javax.transaction.Transactional; import java.util.*; @@ -35,23 +36,18 @@ import java.util.concurrent.TimeUnit; */ @Service @Slf4j +@RequiredArgsConstructor public class V3ToV4MigrateService { private static final String MIGRATE_LOCK_TEMPLATE = "v3to4MigrateLock-%s-%s"; - @Resource - private LockService lockService; - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private WorkflowInfoRepository workflowInfoRepository; - @Resource - private WorkflowNodeInfoRepository workflowNodeInfoRepository; - /** - * 避免内部方法调用导致事务不生效 - */ - @Resource - private V3ToV4MigrateService self; + private final LockService lockService; + + private final JobInfoRepository jobInfoRepository; + + private final WorkflowInfoRepository workflowInfoRepository; + + private final WorkflowNodeInfoRepository workflowNodeInfoRepository; /* ********************** 3.x => 4.x ********************** */ @@ -149,7 +145,7 @@ public class V3ToV4MigrateService { for (WorkflowInfoDO workflowInfo : workflowInfoList) { try { - boolean fixed = self.fixWorkflowInfoCoreFromV3ToV4(workflowInfo, jobId2NodeIdMap); + boolean fixed = SpringUtils.getBean(this.getClass()).fixWorkflowInfoCoreFromV3ToV4(workflowInfo, jobId2NodeIdMap); if (fixed) { fixedWorkflowIds.add(workflowInfo.getId()); } diff --git a/powerjob-server/powerjob-server-monitor/pom.xml b/powerjob-server/powerjob-server-monitor/pom.xml index 23c32a93..c54bb0df 100644 --- a/powerjob-server/powerjob-server-monitor/pom.xml +++ b/powerjob-server/powerjob-server-monitor/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-monitor/src/main/java/tech/powerjob/server/monitor/PowerJobMonitorService.java b/powerjob-server/powerjob-server-monitor/src/main/java/tech/powerjob/server/monitor/PowerJobMonitorService.java index 90caaa5e..9969a94d 100644 --- a/powerjob-server/powerjob-server-monitor/src/main/java/tech/powerjob/server/monitor/PowerJobMonitorService.java +++ b/powerjob-server/powerjob-server-monitor/src/main/java/tech/powerjob/server/monitor/PowerJobMonitorService.java @@ -2,7 +2,6 @@ package tech.powerjob.server.monitor; import com.google.common.collect.Lists; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.util.List; @@ -19,9 +18,7 @@ public class PowerJobMonitorService implements MonitorService { private final List monitors = Lists.newLinkedList(); - @Autowired public PowerJobMonitorService(List monitors) { - monitors.forEach(m -> { log.info("[MonitorService] register monitor: {}", m.getClass().getName()); this.monitors.add(m); diff --git a/powerjob-server/powerjob-server-persistence/pom.xml b/powerjob-server/powerjob-server-persistence/pom.xml index 9459b301..a5e6b9f4 100644 --- a/powerjob-server/powerjob-server-persistence/pom.xml +++ b/powerjob-server/powerjob-server-persistence/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/LocalJpaConfig.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/LocalJpaConfig.java index c33f0b9e..215ef406 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/LocalJpaConfig.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/LocalJpaConfig.java @@ -7,6 +7,7 @@ import org.springframework.boot.autoconfigure.orm.jpa.JpaProperties; import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.DependsOn; import org.springframework.data.jpa.repository.config.EnableJpaRepositories; import org.springframework.orm.jpa.JpaTransactionManager; import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean; @@ -38,9 +39,6 @@ import java.util.Objects; ) public class LocalJpaConfig { - @Resource(name = "omsLocalDatasource") - private DataSource omsLocalDatasource; - public static final String LOCAL_PACKAGES = "tech.powerjob.server.persistence.local"; private static Map genDatasourceProperties() { @@ -56,8 +54,7 @@ public class LocalJpaConfig { } @Bean(name = "localEntityManagerFactory") - public LocalContainerEntityManagerFactoryBean initLocalEntityManagerFactory(EntityManagerFactoryBuilder builder) { - + public LocalContainerEntityManagerFactoryBean initLocalEntityManagerFactory(@Qualifier("omsLocalDatasource") DataSource omsLocalDatasource,EntityManagerFactoryBuilder builder) { return builder .dataSource(omsLocalDatasource) .properties(genDatasourceProperties()) @@ -66,10 +63,9 @@ public class LocalJpaConfig { .build(); } - @Bean(name = "localTransactionManager") - public PlatformTransactionManager initLocalTransactionManager(EntityManagerFactoryBuilder builder) { - return new JpaTransactionManager(Objects.requireNonNull(initLocalEntityManagerFactory(builder).getObject())); + public PlatformTransactionManager initLocalTransactionManager(@Qualifier("localEntityManagerFactory") LocalContainerEntityManagerFactoryBean localContainerEntityManagerFactoryBean) { + return new JpaTransactionManager(Objects.requireNonNull(localContainerEntityManagerFactoryBean.getObject())); } @Bean(name = "localTransactionTemplate") diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/PowerJobPhysicalNamingStrategy.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/PowerJobPhysicalNamingStrategy.java index 2bd7ca04..d0488c01 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/PowerJobPhysicalNamingStrategy.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/PowerJobPhysicalNamingStrategy.java @@ -5,7 +5,7 @@ import tech.powerjob.server.common.utils.PropertyUtils; import org.hibernate.boot.model.naming.Identifier; import org.hibernate.engine.jdbc.env.spi.JdbcEnvironment; import org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.Serializable; @@ -39,7 +39,7 @@ public class PowerJobPhysicalNamingStrategy extends SpringPhysicalNamingStrategy String text = name.getText(); String noDOText = StringUtils.endsWithIgnoreCase(text, "do") ? text.substring(0, text.length() - 2) : text; - String newText = StringUtils.hasLength(tablePrefix) ? tablePrefix + noDOText : noDOText; + String newText = StringUtils.isEmpty(tablePrefix) ? tablePrefix + noDOText : noDOText; return super.toPhysicalTableName(new Identifier(newText, name.isQuoted()), jdbcEnvironment); } diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/RemoteJpaConfig.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/RemoteJpaConfig.java index 3bc52603..fb784152 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/RemoteJpaConfig.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/config/RemoteJpaConfig.java @@ -1,5 +1,6 @@ package tech.powerjob.server.persistence.config; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.autoconfigure.orm.jpa.HibernateProperties; import org.springframework.boot.autoconfigure.orm.jpa.HibernateSettings; import org.springframework.boot.autoconfigure.orm.jpa.JpaProperties; @@ -13,7 +14,6 @@ import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean; import org.springframework.transaction.PlatformTransactionManager; import org.springframework.transaction.annotation.EnableTransactionManagement; -import javax.annotation.Resource; import javax.sql.DataSource; import java.util.Map; import java.util.Objects; @@ -36,12 +36,6 @@ import java.util.Objects; ) public class RemoteJpaConfig { - @Resource(name = "omsRemoteDatasource") - private DataSource omsRemoteDatasource; - - @Resource(name = "multiDatasourceProperties") - private MultiDatasourceProperties properties; - public static final String CORE_PACKAGES = "tech.powerjob.server.persistence.remote"; /** @@ -69,7 +63,7 @@ public class RemoteJpaConfig { @Primary @Bean(name = "remoteEntityManagerFactory") - public LocalContainerEntityManagerFactoryBean initRemoteEntityManagerFactory(EntityManagerFactoryBuilder builder) { + public LocalContainerEntityManagerFactoryBean initRemoteEntityManagerFactory(@Qualifier("omsRemoteDatasource") DataSource omsRemoteDatasource,@Qualifier("multiDatasourceProperties") MultiDatasourceProperties properties, EntityManagerFactoryBuilder builder) { Map datasourceProperties = genDatasourceProperties(); datasourceProperties.putAll(properties.getRemote().getHibernate().getProperties()); return builder @@ -83,7 +77,7 @@ public class RemoteJpaConfig { @Primary @Bean(name = "remoteTransactionManager") - public PlatformTransactionManager initRemoteTransactionManager(EntityManagerFactoryBuilder builder) { - return new JpaTransactionManager(Objects.requireNonNull(initRemoteEntityManagerFactory(builder).getObject())); + public PlatformTransactionManager initRemoteTransactionManager(@Qualifier("remoteEntityManagerFactory") LocalContainerEntityManagerFactoryBean localContainerEntityManagerFactoryBean) { + return new JpaTransactionManager(Objects.requireNonNull(localContainerEntityManagerFactoryBean.getObject())); } } diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/local/LocalInstanceLogRepository.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/local/LocalInstanceLogRepository.java index cd8057f7..ab72a745 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/local/LocalInstanceLogRepository.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/local/LocalInstanceLogRepository.java @@ -16,16 +16,20 @@ import java.util.stream.Stream; */ public interface LocalInstanceLogRepository extends JpaRepository { - // 流式查询 + /** + * 流式查询 + */ Stream findByInstanceIdOrderByLogTime(Long instanceId); - // 删除数据 + /** + * 删除数据 + */ @Modifying - @Transactional + @Transactional(rollbackOn = Exception.class) long deleteByInstanceId(Long instanceId); @Modifying - @Transactional + @Transactional(rollbackOn = Exception.class) @CanIgnoreReturnValue long deleteByInstanceIdInAndLogTimeLessThan(List instanceIds, Long t); diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/mongodb/GridFsManager.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/mongodb/GridFsManager.java index 9a607ad5..d275a0fd 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/mongodb/GridFsManager.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/mongodb/GridFsManager.java @@ -1,6 +1,5 @@ package tech.powerjob.server.persistence.mongodb; -import tech.powerjob.server.common.PowerJobServerConfigKey; import com.google.common.base.Stopwatch; import com.google.common.collect.Maps; import com.mongodb.client.MongoDatabase; @@ -19,8 +18,8 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.env.Environment; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.stereotype.Service; +import tech.powerjob.server.common.PowerJobServerConfigKey; -import javax.annotation.Resource; import java.io.*; import java.util.Date; import java.util.Map; @@ -36,21 +35,24 @@ import java.util.function.Consumer; @Service public class GridFsManager implements InitializingBean { - @Resource - private Environment environment; + private final Environment environment; + + private final MongoDatabase db; - private MongoDatabase db; private boolean available; private final Map bucketCache = Maps.newConcurrentMap(); public static final String LOG_BUCKET = "log"; + public static final String CONTAINER_BUCKET = "container"; - @Autowired(required = false) - public void setMongoTemplate(MongoTemplate mongoTemplate) { + public GridFsManager(Environment environment, @Autowired(required = false) MongoTemplate mongoTemplate) { + this.environment = environment; if (mongoTemplate != null) { this.db = mongoTemplate.getDb(); + } else { + this.db = null; } } diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/monitor/DatabaseMonitorAspect.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/monitor/DatabaseMonitorAspect.java index 6d5adb49..8be9a4aa 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/monitor/DatabaseMonitorAspect.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/monitor/DatabaseMonitorAspect.java @@ -1,5 +1,6 @@ package tech.powerjob.server.persistence.monitor; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.aspectj.lang.ProceedingJoinPoint; import org.aspectj.lang.annotation.Around; @@ -11,7 +12,6 @@ import tech.powerjob.server.monitor.MonitorService; import tech.powerjob.server.monitor.events.db.DatabaseEvent; import tech.powerjob.server.monitor.events.db.DatabaseType; -import javax.annotation.Resource; import java.util.Collection; import java.util.Optional; import java.util.stream.Stream; @@ -25,10 +25,10 @@ import java.util.stream.Stream; @Slf4j @Aspect @Component +@RequiredArgsConstructor public class DatabaseMonitorAspect { - @Resource - private MonitorService monitorService; + private final MonitorService monitorService; @Around("execution(* tech.powerjob.server.persistence.remote.repository..*.*(..))") public Object monitorCoreDB(ProceedingJoinPoint joinPoint) throws Throwable { diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/AppInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/AppInfoDO.java index ec5c1fe1..7395af38 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/AppInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/AppInfoDO.java @@ -14,7 +14,7 @@ import java.util.Date; */ @Data @Entity -@Table(uniqueConstraints = {@UniqueConstraint(name = "appNameUK", columnNames = {"appName"})}) +@Table(uniqueConstraints = {@UniqueConstraint(name = "uidx01_app_info", columnNames = {"appName"})}) public class AppInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ContainerInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ContainerInfoDO.java index 8bc09eea..b1fcd3ce 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ContainerInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ContainerInfoDO.java @@ -14,7 +14,7 @@ import java.util.Date; */ @Data @Entity -@Table(indexes = {@Index(columnList = "appId")}) +@Table(indexes = {@Index(name = "idx01_container_info", columnList = "appId")}) public class ContainerInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/InstanceInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/InstanceInfoDO.java index 9c448074..c74e6546 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/InstanceInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/InstanceInfoDO.java @@ -19,7 +19,11 @@ import java.util.Date; @Entity @NoArgsConstructor @AllArgsConstructor -@Table(indexes = {@Index(columnList = "jobId"), @Index(columnList = "appId"), @Index(columnList = "instanceId")}) +@Table(indexes = { + @Index(name = "idx01_instance_info", columnList = "jobId,status"), + @Index(name = "idx02_instance_info", columnList = "appId,status"), + @Index(name = "idx03_instance_info", columnList = "instanceId,status") +}) public class InstanceInfoDO { @Id @@ -40,6 +44,7 @@ public class InstanceInfoDO { private Long instanceId; /** * 任务参数(静态) + * * @since 2021/2/01 */ @Lob diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/JobInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/JobInfoDO.java index 3fba4f45..a3b4bfbf 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/JobInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/JobInfoDO.java @@ -19,7 +19,9 @@ import java.util.Date; @Entity @NoArgsConstructor @AllArgsConstructor -@Table(indexes = {@Index(columnList = "appId")}) +@Table(indexes = { + @Index(name = "idx01_job_info", columnList = "appId,status,timeExpressionType,nextTriggerTime"), +}) public class JobInfoDO { diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/OmsLockDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/OmsLockDO.java index 3f045c63..e6d1f7b3 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/OmsLockDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/OmsLockDO.java @@ -16,7 +16,7 @@ import java.util.Date; @Data @Entity @NoArgsConstructor -@Table(uniqueConstraints = {@UniqueConstraint(name = "lockNameUK", columnNames = {"lockName"})}) +@Table(uniqueConstraints = {@UniqueConstraint(name = "uidx01_oms_lock", columnNames = {"lockName"})}) public class OmsLockDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ServerInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ServerInfoDO.java index 53c8a396..b274285f 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ServerInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/ServerInfoDO.java @@ -16,7 +16,10 @@ import java.util.Date; @Data @Entity @NoArgsConstructor -@Table(uniqueConstraints = {@UniqueConstraint(columnNames = "ip")}) +@Table( + uniqueConstraints = {@UniqueConstraint(name = "uidx01_server_info", columnNames = "ip")}, + indexes = {@Index(name = "idx01_server_info", columnList = "gmtModified")} +) public class ServerInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/UserInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/UserInfoDO.java index 307f4d2a..e19bef8c 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/UserInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/UserInfoDO.java @@ -14,7 +14,10 @@ import java.util.Date; */ @Data @Entity -@Table +@Table(indexes = { + @Index(name = "uidx01_user_info", columnList = "username"), + @Index(name = "uidx02_user_info", columnList = "email") +}) public class UserInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInfoDO.java index d01f2ac2..72b5f679 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInfoDO.java @@ -18,7 +18,9 @@ import java.util.Date; @Entity @NoArgsConstructor @AllArgsConstructor -@Table(indexes = {@Index(columnList = "appId")}) +@Table(indexes = { + @Index(name = "idx01_workflow_info",columnList = "appId,status,timeExpressionType,nextTriggerTime") +}) public class WorkflowInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInstanceInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInstanceInfoDO.java index 844b52c6..48625bca 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInstanceInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowInstanceInfoDO.java @@ -18,7 +18,13 @@ import java.util.Date; @Entity @NoArgsConstructor @AllArgsConstructor -@Table +@Table( + uniqueConstraints = {@UniqueConstraint(name = "uidx01_wf_instance", columnNames = {"wfInstanceId"})}, + indexes = { + @Index(name = "idx01_wf_instance", columnList = "workflowId,status"), + @Index(name = "idx01_wf_instance", columnList = "appId,status,expectedTriggerTime") + } +) public class WorkflowInstanceInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowNodeInfoDO.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowNodeInfoDO.java index 0e51a330..755ca5d7 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowNodeInfoDO.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/WorkflowNodeInfoDO.java @@ -21,7 +21,9 @@ import java.util.Date; @Entity @NoArgsConstructor @AllArgsConstructor -@Table(indexes = {@Index(columnList = "appId"), @Index(columnList = "workflowId")}) +@Table(indexes = { + @Index(name = "idx01_workflow_node_info", columnList = "workflowId,gmtCreate") +}) public class WorkflowNodeInfoDO { @Id diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/brief/BriefInstanceInfo.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/brief/BriefInstanceInfo.java new file mode 100644 index 00000000..aa55448f --- /dev/null +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/model/brief/BriefInstanceInfo.java @@ -0,0 +1,45 @@ +package tech.powerjob.server.persistence.remote.model.brief; + + +import lombok.Data; + +/** + * @author Echo009 + * @since 2022/9/13 + */ +@Data +public class BriefInstanceInfo { + + + private Long appId; + + private Long id; + /** + * 任务ID + */ + private Long jobId; + /** + * 任务所属应用的ID,冗余提高查询效率 + */ + private Long instanceId; + /** + * 总共执行的次数(用于重试判断) + */ + private Long runningTimes; + + + public BriefInstanceInfo(Long appId, Long id, Long jobId, Long instanceId) { + this.appId = appId; + this.id = id; + this.jobId = jobId; + this.instanceId = instanceId; + } + + public BriefInstanceInfo(Long appId, Long id, Long jobId, Long instanceId, Long runningTimes) { + this.appId = appId; + this.id = id; + this.jobId = jobId; + this.instanceId = instanceId; + this.runningTimes = runningTimes; + } +} diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/AppInfoRepository.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/AppInfoRepository.java index 874915e1..a7e6d09d 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/AppInfoRepository.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/AppInfoRepository.java @@ -1,5 +1,7 @@ package tech.powerjob.server.persistence.remote.repository; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import tech.powerjob.server.persistence.remote.model.AppInfoDO; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; @@ -25,4 +27,8 @@ public interface AppInfoRepository extends JpaRepository { * 其实只需要 id,处于性能考虑可以直接写SQL只返回ID */ List findAllByCurrentServer(String currentServer); + + @Query(value = "select id from AppInfoDO where currentServer = :currentServer") + List listAppIdByCurrentServer(@Param("currentServer")String currentServer); + } diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/InstanceInfoRepository.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/InstanceInfoRepository.java index d63a21fe..933f5fcc 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/InstanceInfoRepository.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/InstanceInfoRepository.java @@ -1,12 +1,14 @@ package tech.powerjob.server.persistence.remote.repository; -import tech.powerjob.server.persistence.remote.model.InstanceInfoDO; import com.google.errorprone.annotations.CanIgnoreReturnValue; +import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.JpaSpecificationExecutor; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.query.Param; +import tech.powerjob.server.persistence.remote.model.InstanceInfoDO; +import tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo; import javax.transaction.Transactional; import java.util.Date; @@ -23,10 +25,26 @@ public interface InstanceInfoRepository extends JpaRepository status); List findByJobIdAndStatusIn(long jobId, List status); + /** + * 更新状态变更信息 + * + * @param lastReportTime 最近一次上报时间 + * @param modifyTime 更新时间 + * @param runningTimes 运行次数 + * @param instanceId 实例 ID + * @param status 目标状态 + * @param oldStatus 旧状态 + * @return 更新记录数 + */ + @Transactional(rollbackOn = Exception.class) + @Modifying + @Query(value = "update InstanceInfoDO set lastReportTime = :lastReportTime, gmtModified = :modifyTime, runningTimes = :runningTimes, status = :status where instanceId = :instanceId and status = :oldStatus") + int updateStatusChangeInfoByInstanceIdAndStatus(@Param("lastReportTime") long lastReportTime, @Param("modifyTime") Date modifyTime, @Param("runningTimes") long runningTimes, @Param("status") int status, @Param("instanceId") long instanceId, @Param("oldStatus") int oldStatus); /** * 更新任务执行记录内容(DispatchService专用) @@ -46,6 +64,7 @@ public interface InstanceInfoRepository extends JpaRepository instanceIdList, @Param("originStatus") int originStatus, @Param("status") int status, @Param("modifyTime") Date modifyTime); /** * 更新固定频率任务的执行记录 @@ -77,19 +111,20 @@ public interface InstanceInfoRepository extends JpaRepository findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(@Param("appIds") List appIds, @Param("status") int status, @Param("time") long time, Pageable pageable); - List findByAppIdInAndStatusAndExpectedTriggerTimeLessThan(List jobIds, int status, long time); + @Query(value = "select new tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo(i.appId,i.id,i.jobId,i.instanceId) from InstanceInfoDO i where i.appId in (:appIds) and i.status = :status and i.actualTriggerTime < :time") + List selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(@Param("appIds") List appIds, @Param("status") int status, @Param("time") long time, Pageable pageable); - List findByAppIdInAndStatusAndActualTriggerTimeLessThan(List jobIds, int status, long time); - - List findByAppIdInAndStatusAndGmtModifiedBefore(List jobIds, int status, Date time); + @Query(value = "select new tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo(i.appId,i.id,i.jobId,i.instanceId,i.runningTimes) from InstanceInfoDO i where i.appId in (:appIds) and i.status = :status and i.gmtModified < :time") + List selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(@Param("appIds") List appIds, @Param("status") int status, @Param("time") Date time, Pageable pageable); InstanceInfoDO findByInstanceId(long instanceId); /* --数据统计-- */ + @Query(value = "select count(*) from InstanceInfoDO where appId = ?1 and status = ?2") long countByAppIdAndStatus(long appId, int status); long countByAppIdAndStatusAndGmtCreateAfter(long appId, int status, Date time); diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/JobInfoRepository.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/JobInfoRepository.java index ddd77e9a..a65efbf4 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/JobInfoRepository.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/JobInfoRepository.java @@ -7,6 +7,7 @@ import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.JpaSpecificationExecutor; import org.springframework.data.jpa.repository.Query; +import java.util.Collection; import java.util.List; import java.util.Set; @@ -44,4 +45,6 @@ public interface JobInfoRepository extends JpaRepository, JpaSp List findByAppId(Long appId); + List findByIdIn(Collection jobIds); + } diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/OmsLockRepository.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/OmsLockRepository.java index 64dedf28..54baa3a4 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/OmsLockRepository.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/OmsLockRepository.java @@ -16,13 +16,13 @@ import javax.transaction.Transactional; public interface OmsLockRepository extends JpaRepository { @Modifying - @Transactional + @Transactional(rollbackOn = Exception.class) @Query(value = "delete from OmsLockDO where lockName = ?1") int deleteByLockName(String lockName); OmsLockDO findByLockName(String lockName); @Modifying - @Transactional + @Transactional(rollbackOn = Exception.class) int deleteByOwnerIP(String ip); } diff --git a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/WorkflowInfoRepository.java b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/WorkflowInfoRepository.java index 7a673fc4..2c664d9b 100644 --- a/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/WorkflowInfoRepository.java +++ b/powerjob-server/powerjob-server-persistence/src/main/java/tech/powerjob/server/persistence/remote/repository/WorkflowInfoRepository.java @@ -20,13 +20,18 @@ public interface WorkflowInfoRepository extends JpaRepository findByAppId(Long appId); - /** 对外查询(list)三兄弟 */ + /** + * 对外查询(list)三兄弟 + */ Page findByAppIdAndStatusNot(Long appId, int nStatus, Pageable pageable); + Page findByIdAndStatusNot(Long id, int nStatus, Pageable pageable); + Page findByAppIdAndStatusNotAndWfNameLike(Long appId, int nStatus, String condition, Pageable pageable); } diff --git a/powerjob-server/powerjob-server-remote/pom.xml b/powerjob-server/powerjob-server-remote/pom.xml index 0391b8b5..878289d0 100644 --- a/powerjob-server/powerjob-server-remote/pom.xml +++ b/powerjob-server/powerjob-server-remote/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/election/ServerElectionService.java b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/election/ServerElectionService.java index 1370949e..1db65201 100644 --- a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/election/ServerElectionService.java +++ b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/election/ServerElectionService.java @@ -37,20 +37,25 @@ import java.util.concurrent.TimeUnit; @Service public class ServerElectionService { - @Resource - private LockService lockService; - @Resource - private TransportService transportService; - @Resource - private AppInfoRepository appInfoRepository; + private final LockService lockService; - @Value("${oms.accurate.select.server.percentage}") - private int accurateSelectServerPercentage; + private final TransportService transportService; + + private final AppInfoRepository appInfoRepository; + + private final int accurateSelectServerPercentage; private static final int RETRY_TIMES = 10; private static final long PING_TIMEOUT_MS = 1000; private static final String SERVER_ELECT_LOCK = "server_elect_%d"; + public ServerElectionService(LockService lockService, TransportService transportService, AppInfoRepository appInfoRepository,@Value("${oms.accurate.select.server.percentage}") int accurateSelectServerPercentage) { + this.lockService = lockService; + this.transportService = transportService; + this.appInfoRepository = appInfoRepository; + this.accurateSelectServerPercentage = accurateSelectServerPercentage; + } + public String elect(Long appId, String protocol, String currentServer) { if (!accurate()) { // 如果是本机,就不需要查数据库那么复杂的操作了,直接返回成功 diff --git a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/redirector/DesignateServerAspect.java b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/redirector/DesignateServerAspect.java index 2d9278ce..9b5280b5 100644 --- a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/redirector/DesignateServerAspect.java +++ b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/server/redirector/DesignateServerAspect.java @@ -4,6 +4,7 @@ import akka.pattern.Patterns; import com.fasterxml.jackson.databind.JavaType; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.type.TypeFactory; +import lombok.RequiredArgsConstructor; import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.RemoteConstant; import tech.powerjob.common.response.AskResponse; @@ -39,10 +40,10 @@ import java.util.concurrent.CompletionStage; @Aspect @Component @Order(0) +@RequiredArgsConstructor public class DesignateServerAspect { - @Resource - private AppInfoRepository appInfoRepository; + private final AppInfoRepository appInfoRepository; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -70,7 +71,7 @@ public class DesignateServerAspect { } if (appId == null) { - throw new PowerJobException("can't find appId in params for:" + signature.toString()); + throw new PowerJobException("can't find appId in params for:" + signature); } // 获取执行机器 diff --git a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/ClusterStatusHolder.java b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/ClusterStatusHolder.java index e104bd3c..98b9293d 100644 --- a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/ClusterStatusHolder.java +++ b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/ClusterStatusHolder.java @@ -21,11 +21,17 @@ import java.util.Map; @Slf4j public class ClusterStatusHolder { - // 集群所属的应用名称 + /** + * 集群所属的应用名称 + */ private final String appName; - // 集群中所有机器的信息 + /** + * 集群中所有机器的信息 + */ private final Map address2WorkerInfo; - // 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo) + /** + * 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo) + */ private Map> containerId2Infos; diff --git a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterManagerService.java b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterManagerService.java index 37b94b8a..0fb8c224 100644 --- a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterManagerService.java +++ b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterManagerService.java @@ -18,8 +18,10 @@ import java.util.Set; @Slf4j public class WorkerClusterManagerService { - // 存储Worker健康信息,appId -> ClusterStatusHolder - private static final Map appId2ClusterStatus = Maps.newConcurrentMap(); + /** + * 存储Worker健康信息,appId -> ClusterStatusHolder + */ + private static final Map APP_ID_2_CLUSTER_STATUS = Maps.newConcurrentMap(); /** * 更新状态 @@ -28,7 +30,7 @@ public class WorkerClusterManagerService { public static void updateStatus(WorkerHeartbeat heartbeat) { Long appId = heartbeat.getAppId(); String appName = heartbeat.getAppName(); - ClusterStatusHolder clusterStatusHolder = appId2ClusterStatus.computeIfAbsent(appId, ignore -> new ClusterStatusHolder(appName)); + ClusterStatusHolder clusterStatusHolder = APP_ID_2_CLUSTER_STATUS.computeIfAbsent(appId, ignore -> new ClusterStatusHolder(appName)); clusterStatusHolder.updateStatus(heartbeat); } @@ -38,7 +40,7 @@ public class WorkerClusterManagerService { */ public static void clean(List usingAppIds) { Set keys = Sets.newHashSet(usingAppIds); - appId2ClusterStatus.entrySet().removeIf(entry -> !keys.contains(entry.getKey())); + APP_ID_2_CLUSTER_STATUS.entrySet().removeIf(entry -> !keys.contains(entry.getKey())); } @@ -46,11 +48,11 @@ public class WorkerClusterManagerService { * 清理缓存信息,防止 OOM */ public static void cleanUp() { - appId2ClusterStatus.values().forEach(ClusterStatusHolder::release); + APP_ID_2_CLUSTER_STATUS.values().forEach(ClusterStatusHolder::release); } protected static Map getAppId2ClusterStatus() { - return appId2ClusterStatus; + return APP_ID_2_CLUSTER_STATUS; } } diff --git a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterQueryService.java b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterQueryService.java index 04002cb7..2cbef095 100644 --- a/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterQueryService.java +++ b/powerjob-server/powerjob-server-remote/src/main/java/tech/powerjob/server/remote/worker/WorkerClusterQueryService.java @@ -26,9 +26,8 @@ import java.util.Optional; @Service public class WorkerClusterQueryService { - private List workerFilters; + private final List workerFilters; - @Autowired public WorkerClusterQueryService(List workerFilters) { this.workerFilters = workerFilters; } @@ -92,7 +91,6 @@ public class WorkerClusterQueryService { */ public Optional getWorkerInfoByAddress(Long appId, String address) { // this may cause NPE while address value is null . - //return Optional.ofNullable(getWorkerInfosByAppId(appId).get(address)); final Map workerInfosByAppId = getWorkerInfosByAppId(appId); //add null check for both workerInfos Map and address if (null != workerInfosByAppId && null != address) { diff --git a/powerjob-server/powerjob-server-starter/pom.xml b/powerjob-server/powerjob-server-starter/pom.xml index 9bc82db9..8f3f73df 100644 --- a/powerjob-server/powerjob-server-starter/pom.xml +++ b/powerjob-server/powerjob-server-starter/pom.xml @@ -5,7 +5,7 @@ powerjob-server tech.powerjob - 4.2.0 + 4.2.1 ../pom.xml 4.0.0 diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/SwaggerConfig.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/SwaggerConfig.java index fac926c5..f9c7a2fd 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/SwaggerConfig.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/SwaggerConfig.java @@ -1,5 +1,6 @@ package tech.powerjob.server.config; +import lombok.RequiredArgsConstructor; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -25,10 +26,10 @@ import static springfox.documentation.builders.PathSelectors.any; @Configuration @EnableSwagger2 @ConditionalOnProperty(name = PowerJobServerConfigKey.SWAGGER_UI_ENABLE, havingValue = "true") +@RequiredArgsConstructor public class SwaggerConfig { - @Resource - private ServerInfoService serverInfoService; + private final ServerInfoService serverInfoService; @Bean public Docket createRestApi() { diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/ThreadPoolConfig.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/ThreadPoolConfig.java index 899c182e..bd77d50c 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/ThreadPoolConfig.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/config/ThreadPoolConfig.java @@ -1,18 +1,17 @@ package tech.powerjob.server.config; -import org.springframework.core.task.AsyncTaskExecutor; -import org.springframework.core.task.TaskExecutor; -import tech.powerjob.server.common.RejectedExecutionHandlerFactory; import lombok.extern.slf4j.Slf4j; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.core.task.AsyncTaskExecutor; +import org.springframework.core.task.TaskExecutor; import org.springframework.scheduling.TaskScheduler; import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler; +import tech.powerjob.server.common.RejectedExecutionHandlerFactory; import tech.powerjob.server.common.constants.PJThreadPool; - -import java.util.concurrent.*; +import tech.powerjob.server.common.thread.NewThreadRunRejectedExecutionHandler; /** * 公用线程池配置 @@ -34,7 +33,7 @@ public class ThreadPoolConfig { executor.setQueueCapacity(0); executor.setKeepAliveSeconds(60); executor.setThreadNamePrefix("PJ-TIMING-"); - executor.setRejectedExecutionHandler(RejectedExecutionHandlerFactory.newThreadRun(PJThreadPool.TIMING_POOL)); + executor.setRejectedExecutionHandler(new NewThreadRunRejectedExecutionHandler(PJThreadPool.TIMING_POOL)); return executor; } @@ -62,12 +61,14 @@ public class ThreadPoolConfig { return executor; } - // 引入 WebSocket 支持后需要手动初始化调度线程池 + /** + * 引入 WebSocket 支持后需要手动初始化调度线程池 + */ @Bean public TaskScheduler taskScheduler() { ThreadPoolTaskScheduler scheduler = new ThreadPoolTaskScheduler(); - scheduler.setPoolSize(Runtime.getRuntime().availableProcessors()); - scheduler.setThreadNamePrefix("PJ-WS-"); + scheduler.setPoolSize(Math.max(Runtime.getRuntime().availableProcessors() * 8, 32)); + scheduler.setThreadNamePrefix("PJ-DEFAULT-"); scheduler.setDaemon(true); return scheduler; } diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/AppInfoController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/AppInfoController.java index 1e677c32..f50e9a90 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/AppInfoController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/AppInfoController.java @@ -1,5 +1,6 @@ package tech.powerjob.server.web.controller; +import lombok.RequiredArgsConstructor; import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.response.ResultDTO; import tech.powerjob.server.persistence.remote.model.AppInfoDO; @@ -31,12 +32,12 @@ import java.util.stream.Collectors; */ @RestController @RequestMapping("/appInfo") +@RequiredArgsConstructor public class AppInfoController { - @Resource - private AppInfoService appInfoService; - @Resource - private AppInfoRepository appInfoRepository; + private final AppInfoService appInfoService; + + private final AppInfoRepository appInfoRepository; private static final int MAX_APP_NUM = 200; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ContainerController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ContainerController.java index f637e5d8..8f13e0ef 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ContainerController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ContainerController.java @@ -41,15 +41,21 @@ import java.util.stream.Collectors; @RequestMapping("/container") public class ContainerController { - @Value("${server.port}") - private int port; - @Resource - private ContainerService containerService; - @Resource - private AppInfoRepository appInfoRepository; - @Resource - private ContainerInfoRepository containerInfoRepository; + private final int port; + + private final ContainerService containerService; + + private final AppInfoRepository appInfoRepository; + + private final ContainerInfoRepository containerInfoRepository; + + public ContainerController(@Value("${server.port}") int port, ContainerService containerService, AppInfoRepository appInfoRepository, ContainerInfoRepository containerInfoRepository) { + this.port = port; + this.containerService = containerService; + this.appInfoRepository = appInfoRepository; + this.containerInfoRepository = containerInfoRepository; + } @GetMapping("/downloadJar") public void downloadJar(String version, HttpServletResponse response) throws IOException { diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/InstanceController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/InstanceController.java index 0300b199..d2f729c2 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/InstanceController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/InstanceController.java @@ -22,7 +22,7 @@ import org.springframework.data.domain.Example; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Sort; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.web.bind.annotation.*; import javax.annotation.Resource; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/JobController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/JobController.java index 9aa226dc..ab71491c 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/JobController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/JobController.java @@ -1,5 +1,6 @@ package tech.powerjob.server.web.controller; +import org.apache.commons.lang3.StringUtils; import tech.powerjob.common.request.http.SaveJobInfoRequest; import tech.powerjob.common.response.ResultDTO; import tech.powerjob.server.common.constants.SwitchableStatus; @@ -14,7 +15,6 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Sort; -import org.springframework.util.StringUtils; import org.springframework.web.bind.annotation.*; import javax.annotation.Resource; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/OpenAPIController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/OpenAPIController.java index cd181695..0db6a9e3 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/OpenAPIController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/OpenAPIController.java @@ -1,8 +1,10 @@ package tech.powerjob.server.web.controller; -import tech.powerjob.common.enums.InstanceStatus; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.*; import tech.powerjob.common.OpenAPIConstant; import tech.powerjob.common.PowerQuery; +import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.request.http.SaveJobInfoRequest; import tech.powerjob.common.request.http.SaveWorkflowNodeRequest; import tech.powerjob.common.request.http.SaveWorkflowRequest; @@ -11,19 +13,16 @@ import tech.powerjob.common.response.InstanceInfoDTO; import tech.powerjob.common.response.JobInfoDTO; import tech.powerjob.common.response.ResultDTO; import tech.powerjob.common.response.WorkflowInstanceInfoDTO; -import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; -import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO; +import tech.powerjob.server.core.instance.InstanceService; import tech.powerjob.server.core.service.AppInfoService; import tech.powerjob.server.core.service.CacheService; import tech.powerjob.server.core.service.JobService; -import tech.powerjob.server.core.instance.InstanceService; import tech.powerjob.server.core.workflow.WorkflowInstanceService; import tech.powerjob.server.core.workflow.WorkflowService; +import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; +import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO; import tech.powerjob.server.web.response.WorkflowInfoVO; -import org.springframework.web.bind.annotation.*; -import javax.annotation.Resource; -import java.text.ParseException; import java.util.List; /** @@ -34,21 +33,20 @@ import java.util.List; */ @RestController @RequestMapping(OpenAPIConstant.WEB_PATH) +@RequiredArgsConstructor public class OpenAPIController { - @Resource - private AppInfoService appInfoService; - @Resource - private JobService jobService; - @Resource - private InstanceService instanceService; - @Resource - private WorkflowService workflowService; - @Resource - private WorkflowInstanceService workflowInstanceService; + private final AppInfoService appInfoService; - @Resource - private CacheService cacheService; + private final JobService jobService; + + private final InstanceService instanceService; + + private final WorkflowService workflowService; + + private final WorkflowInstanceService workflowInstanceService; + + private final CacheService cacheService; @PostMapping(OpenAPIConstant.ASSERT) @@ -59,7 +57,7 @@ public class OpenAPIController { /* ************* Job 区 ************* */ @PostMapping(OpenAPIConstant.SAVE_JOB) - public ResultDTO saveJob(@RequestBody SaveJobInfoRequest request) throws ParseException { + public ResultDTO saveJob(@RequestBody SaveJobInfoRequest request) { if (request.getId() != null) { checkJobIdValid(request.getId(), request.getAppId()); } @@ -102,7 +100,7 @@ public class OpenAPIController { } @PostMapping(OpenAPIConstant.ENABLE_JOB) - public ResultDTO enableJob(Long jobId, Long appId) throws ParseException { + public ResultDTO enableJob(Long jobId, Long appId) { checkJobIdValid(jobId, appId); jobService.enableJob(jobId); return ResultDTO.success(null); @@ -156,7 +154,7 @@ public class OpenAPIController { /* ************* Workflow 区 ************* */ @PostMapping(OpenAPIConstant.SAVE_WORKFLOW) - public ResultDTO saveWorkflow(@RequestBody SaveWorkflowRequest request) throws ParseException { + public ResultDTO saveWorkflow(@RequestBody SaveWorkflowRequest request) { return ResultDTO.success(workflowService.saveWorkflow(request)); } diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ServerController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ServerController.java index 7f60fb05..ada346e3 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ServerController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ServerController.java @@ -2,6 +2,11 @@ package tech.powerjob.server.web.controller; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; import tech.powerjob.common.response.ResultDTO; import tech.powerjob.common.utils.CommonUtils; import tech.powerjob.common.utils.NetUtils; @@ -10,12 +15,7 @@ import tech.powerjob.server.persistence.remote.repository.AppInfoRepository; import tech.powerjob.server.remote.server.election.ServerElectionService; import tech.powerjob.server.remote.transport.TransportService; import tech.powerjob.server.remote.worker.WorkerClusterQueryService; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; -import javax.annotation.Resource; import java.util.Optional; import java.util.TimeZone; @@ -28,16 +28,16 @@ import java.util.TimeZone; */ @RestController @RequestMapping("/server") +@RequiredArgsConstructor public class ServerController { - @Resource - private TransportService transportService; - @Resource - private ServerElectionService serverElectionService; - @Resource - private AppInfoRepository appInfoRepository; - @Resource - private WorkerClusterQueryService workerClusterQueryService; + private final TransportService transportService; + + private final ServerElectionService serverElectionService; + + private final AppInfoRepository appInfoRepository; + + private final WorkerClusterQueryService workerClusterQueryService; @GetMapping("/assert") public ResultDTO assertAppName(String appName) { diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/SystemInfoController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/SystemInfoController.java index 9dda6ea2..5864fecc 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/SystemInfoController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/SystemInfoController.java @@ -1,25 +1,24 @@ package tech.powerjob.server.web.controller; -import tech.powerjob.common.enums.InstanceStatus; -import tech.powerjob.common.OmsConstant; -import tech.powerjob.common.response.ResultDTO; -import tech.powerjob.server.common.constants.SwitchableStatus; -import tech.powerjob.server.common.module.ServerInfo; -import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; -import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; -import tech.powerjob.server.remote.server.self.ServerInfoService; -import tech.powerjob.server.remote.worker.WorkerClusterQueryService; -import tech.powerjob.server.common.module.WorkerInfo; -import tech.powerjob.server.web.response.SystemOverviewVO; -import tech.powerjob.server.web.response.WorkerStatusVO; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.time.DateFormatUtils; import org.apache.commons.lang3.time.DateUtils; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; +import tech.powerjob.common.OmsConstant; +import tech.powerjob.common.enums.InstanceStatus; +import tech.powerjob.common.response.ResultDTO; +import tech.powerjob.server.common.constants.SwitchableStatus; +import tech.powerjob.server.common.module.WorkerInfo; +import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; +import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; +import tech.powerjob.server.remote.server.self.ServerInfoService; +import tech.powerjob.server.remote.worker.WorkerClusterQueryService; +import tech.powerjob.server.web.response.SystemOverviewVO; +import tech.powerjob.server.web.response.WorkerStatusVO; -import javax.annotation.Resource; import java.util.Date; import java.util.List; import java.util.TimeZone; @@ -34,17 +33,16 @@ import java.util.stream.Collectors; @Slf4j @RestController @RequestMapping("/system") +@RequiredArgsConstructor public class SystemInfoController { - @Resource - private JobInfoRepository jobInfoRepository; - @Resource - private InstanceInfoRepository instanceInfoRepository; + private final JobInfoRepository jobInfoRepository; - @Resource - private ServerInfoService serverInfoService; - @Resource - private WorkerClusterQueryService workerClusterQueryService; + private final InstanceInfoRepository instanceInfoRepository; + + private final ServerInfoService serverInfoService; + + private final WorkerClusterQueryService workerClusterQueryService; @GetMapping("/listWorker") public ResultDTO> listWorker(Long appId) { diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/UserInfoController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/UserInfoController.java index c0aebbdb..029c653f 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/UserInfoController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/UserInfoController.java @@ -11,7 +11,7 @@ import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.web.bind.annotation.*; import javax.annotation.Resource; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ValidateController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ValidateController.java index 4b0e2c2a..5b07bb24 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ValidateController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/ValidateController.java @@ -1,6 +1,7 @@ package tech.powerjob.server.web.controller; import com.google.common.collect.Lists; +import lombok.RequiredArgsConstructor; import org.apache.commons.lang3.exception.ExceptionUtils; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; @@ -10,7 +11,6 @@ import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.response.ResultDTO; import tech.powerjob.server.core.scheduler.TimingStrategyService; -import javax.annotation.Resource; import java.util.List; /** @@ -22,10 +22,10 @@ import java.util.List; */ @RestController @RequestMapping("/validate") +@RequiredArgsConstructor public class ValidateController { - @Resource - private TimingStrategyService timingStrategyService; + private final TimingStrategyService timingStrategyService; @GetMapping("/timeExpression") public ResultDTO> checkTimeExpression(TimeExpressionType timeExpressionType, diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowController.java index 9b2b8e87..c78c6965 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowController.java @@ -14,7 +14,7 @@ import tech.powerjob.server.web.response.WorkflowInfoVO; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Sort; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.web.bind.annotation.*; import javax.annotation.Resource; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowInstanceController.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowInstanceController.java index bcd5a825..944737dd 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowInstanceController.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/controller/WorkflowInstanceController.java @@ -14,7 +14,7 @@ import org.springframework.data.domain.Example; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Sort; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.springframework.web.bind.annotation.*; import javax.annotation.Resource; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/GenerateContainerTemplateRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/GenerateContainerTemplateRequest.java index af8e5085..0ce75280 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/GenerateContainerTemplateRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/GenerateContainerTemplateRequest.java @@ -11,15 +11,25 @@ import lombok.Data; @Data public class GenerateContainerTemplateRequest { - // Maven Group + /** + * Maven Group + */ private String group; - // Maven artifact + /** + * Maven artifact + */ private String artifact; - // Maven name + /** + * Maven name + */ private String name; - // 包名(com.xx.xx.xx) + /** + * 包名(com.xx.xx.xx) + */ private String packageName; - // Java版本号,8或者11 + /** + * Java版本号,8或者11 + */ private Integer javaVersion; } diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/ModifyUserInfoRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/ModifyUserInfoRequest.java index 486c5617..db43e346 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/ModifyUserInfoRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/ModifyUserInfoRequest.java @@ -17,8 +17,12 @@ public class ModifyUserInfoRequest { private String password; private String webHook; - // 手机号 + /** + * 手机号 + */ private String phone; - // 邮箱地址 + /** + * 邮箱地址 + */ private String email; } diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryInstanceRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryInstanceRequest.java index cf80f1db..f8e3e9c5 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryInstanceRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryInstanceRequest.java @@ -1,7 +1,7 @@ package tech.powerjob.server.web.request; -import tech.powerjob.server.common.constants.InstanceType; import lombok.Data; +import tech.powerjob.server.common.constants.InstanceType; /** * 任务实例查询对象 @@ -12,14 +12,21 @@ import lombok.Data; @Data public class QueryInstanceRequest { - // 任务所属应用ID + /** + * 任务所属应用ID + */ private Long appId; - // 当前页码 + /** + * 当前页码 + */ private Integer index; - // 页大小 + /** + * 页大小 + */ private Integer pageSize; - - // 查询条件(NORMAL/WORKFLOW) + /** + * 查询条件(NORMAL/WORKFLOW) + */ private InstanceType type; private Long instanceId; private Long jobId; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryJobInfoRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryJobInfoRequest.java index 10fd568c..9f615265 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryJobInfoRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryJobInfoRequest.java @@ -11,14 +11,21 @@ import lombok.Data; @Data public class QueryJobInfoRequest { - // 任务所属应用ID + /** + * 任务所属应用ID + */ private Long appId; - // 当前页码 + /** + * 当前页码 + */ private Integer index; - // 页大小 + /** + * 页大小 + */ private Integer pageSize; - - // 查询条件 + /** + * 任务ID + */ private Long jobId; private String keyword; } diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInfoRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInfoRequest.java index 4c6b96b1..6e89862a 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInfoRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInfoRequest.java @@ -11,14 +11,21 @@ import lombok.Data; @Data public class QueryWorkflowInfoRequest { - // 任务所属应用ID + /** + * 任务所属应用ID + */ private Long appId; - // 当前页码 + /** + * 当前页码 + */ private Integer index; - // 页大小 + /** + * 页大小 + */ private Integer pageSize; - - // 查询条件 + /** + * 查询条件 + */ private Long workflowId; private String keyword; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInstanceRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInstanceRequest.java index 9eb5c1a5..39343506 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInstanceRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/QueryWorkflowInstanceRequest.java @@ -11,15 +11,23 @@ import lombok.Data; @Data public class QueryWorkflowInstanceRequest { - // 任务所属应用ID + /** + * 任务所属应用ID + */ private Long appId; - // 当前页码 + /** + * 当前页码 + */ private Integer index; - // 页大小 + /** + * 页大小 + */ private Integer pageSize; - - // 查询条件(NORMAL/WORKFLOW) + /** + * 查询条件(NORMAL/WORKFLOW) + */ private Long wfInstanceId; + private Long workflowId; private String status; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/SaveContainerInfoRequest.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/SaveContainerInfoRequest.java index 69c0664b..aa734c14 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/SaveContainerInfoRequest.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/request/SaveContainerInfoRequest.java @@ -14,21 +14,33 @@ import lombok.Data; @Data public class SaveContainerInfoRequest { - // 容器ID,null -> 创建;否则代表修改 + /** + * 容器ID,null -> 创建;否则代表修改 + */ private Long id; - // 所属的应用ID + /** + * 所属的应用ID + */ private Long appId; - // 容器名称 + /** + * 容器名称 + */ private String containerName; - // 容器类型,枚举值为 ContainerSourceType(JarFile/Git) + /** + * 容器类型,枚举值为 ContainerSourceType(JarFile/Git) + */ private ContainerSourceType sourceType; - // 由 sourceType 决定,JarFile -> String,存储文件名称;Git -> JSON,包括 URL,branch,username,password + /** + * 由 sourceType 决定,JarFile -> String,存储文件名称;Git -> JSON,包括 URL,branch,username,password + */ private String sourceInfo; - // 状态,枚举值为 ContainerStatus(ENABLE/DISABLE) + /** + * 状态,枚举值为 ContainerStatus(ENABLE/DISABLE) + */ private SwitchableStatus status; public void valid() { diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/ContainerInfoVO.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/ContainerInfoVO.java index dfc7d4ec..c1b2ce63 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/ContainerInfoVO.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/ContainerInfoVO.java @@ -17,18 +17,25 @@ public class ContainerInfoVO { private String containerName; - // 容器类型,枚举值为 ContainerSourceType + /** + * 容器类型,枚举值为 ContainerSourceType + */ private String sourceType; - // 由 sourceType 决定,JarFile -> String,存储文件名称;Git -> JSON,包括 URL,branch,username,password + /** + * 由 sourceType 决定,JarFile -> String,存储文件名称;Git -> JSON,包括 URL,branch,username,password + */ private String sourceInfo; - - // 版本 (Jar包使用md5,Git使用commitId,前者32位,后者40位,不会产生碰撞) + /** + * 版本 (Jar包使用md5,Git使用commitId,前者32位,后者40位,不会产生碰撞) + */ private String version; - - // 状态,枚举值为 ContainerStatus + /** + * 状态,枚举值为 ContainerStatus + */ private String status; - - // 上一次部署时间 + /** + * 上一次部署时间 + */ private String lastDeployTime; private Date gmtCreate; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/InstanceInfoVO.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/InstanceInfoVO.java index 67fa351f..399e627b 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/InstanceInfoVO.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/InstanceInfoVO.java @@ -15,29 +15,44 @@ import org.springframework.beans.BeanUtils; @Data public class InstanceInfoVO { - // 任务ID(JS精度丢失) + /** + * 任务ID(JS精度丢失) + */ private String jobId; - // 任务名称 + /** + * 任务名称 + */ private String jobName; - // 任务实例ID(JS精度丢失) + /** + * 任务实例ID(JS精度丢失) + */ private String instanceId; - // 该任务实例所属的 workflow ID,仅 workflow 任务存在 + /** + * 该任务实例所属的 workflow ID,仅 workflow 任务存在 + */ private String wfInstanceId; - - // 执行结果 + /** + * 执行结果 + */ private String result; - - // TaskTracker地址 + /** + * TaskTracker地址 + */ private String taskTrackerAddress; - - // 总共执行的次数(用于重试判断) + /** + * 总共执行的次数(用于重试判断) + */ private Long runningTimes; private int status; /* ********** 不一致区域 ********** */ - // 实际触发时间(需要格式化为人看得懂的时间) + /** + * 实际触发时间(需要格式化为人看得懂的时间) + */ private String actualTriggerTime; - // 结束时间(同理,需要格式化) + /** + * 结束时间(同理,需要格式化) + */ private String finishedTime; public static InstanceInfoVO from(InstanceInfoDO instanceInfoDo, String jobName) { diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/SystemOverviewVO.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/SystemOverviewVO.java index c7b94500..0f4f9526 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/SystemOverviewVO.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/SystemOverviewVO.java @@ -17,9 +17,13 @@ public class SystemOverviewVO { private long jobCount; private long runningInstanceCount; private long failedInstanceCount; - // 服务器时区 + /** + * 服务器时区 + */ private String timezone; - // 服务器时间 + /** + * 服务器时间 + */ private String serverTime; private ServerInfo serverInfo; diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkerStatusVO.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkerStatusVO.java index 32bd6ee0..a8a6b101 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkerStatusVO.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkerStatusVO.java @@ -1,10 +1,10 @@ package tech.powerjob.server.web.response; +import lombok.Data; +import lombok.NoArgsConstructor; import tech.powerjob.common.model.SystemMetrics; import tech.powerjob.common.utils.CommonUtils; import tech.powerjob.server.common.module.WorkerInfo; -import lombok.Data; -import lombok.NoArgsConstructor; import java.text.DecimalFormat; @@ -27,12 +27,26 @@ public class WorkerStatusVO { private String tag; private String lastActiveTime; - // 1 -> 健康,绿色,2 -> 一般,橙色,3 -> 糟糕,红色,9999 -> 非在线机器 + private Integer lightTaskTrackerNum; + + private Integer heavyTaskTrackerNum; + + private long lastOverloadTime; + + private boolean overloading; + + /** + * 1 -> 健康,绿色,2 -> 一般,橙色,3 -> 糟糕,红色,9999 -> 非在线机器 + */ private int status; - // 12.3%(4 cores) + /** + * 12.3%(4 cores) + */ private static final String CPU_FORMAT = "%s / %s cores"; - // 27.7%(2.9/8.0 GB) + /** + * 27.7%(2.9/8.0 GB) + */ private static final String OTHER_FORMAT = "%s%%(%s / %s GB)"; private static final DecimalFormat df = new DecimalFormat("#.#"); @@ -65,6 +79,11 @@ public class WorkerStatusVO { this.status ++; } + if (workerInfo.overload()){ + // 超载的情况直接置为 3 + this.status = 3; + } + if (workerInfo.timeout()) { this.status = 9999; } @@ -72,5 +91,9 @@ public class WorkerStatusVO { this.protocol = workerInfo.getProtocol(); this.tag = CommonUtils.formatString(workerInfo.getTag()); this.lastActiveTime = CommonUtils.formatTime(workerInfo.getLastActiveTime()); + this.lightTaskTrackerNum = workerInfo.getLightTaskTrackerNum(); + this.heavyTaskTrackerNum = workerInfo.getHeavyTaskTrackerNum(); + this.lastOverloadTime = workerInfo.getLastOverloadTime(); + this.overloading = workerInfo.isOverloading(); } } diff --git a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkflowInfoVO.java b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkflowInfoVO.java index e258aea2..bfdcd92c 100644 --- a/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkflowInfoVO.java +++ b/powerjob-server/powerjob-server-starter/src/main/java/tech/powerjob/server/web/response/WorkflowInfoVO.java @@ -1,6 +1,7 @@ package tech.powerjob.server.web.response; import com.alibaba.fastjson.JSON; +import org.apache.commons.lang3.StringUtils; import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.model.LifeCycle; import tech.powerjob.common.model.PEWorkflowDAG; @@ -9,7 +10,6 @@ import tech.powerjob.server.common.constants.SwitchableStatus; import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO; import lombok.Data; import org.springframework.beans.BeanUtils; -import org.springframework.util.StringUtils; import java.util.Date; import java.util.List; diff --git a/powerjob-server/powerjob-server-starter/src/main/resources/application-daily.properties b/powerjob-server/powerjob-server-starter/src/main/resources/application-daily.properties index 4f8efca7..693186e6 100644 --- a/powerjob-server/powerjob-server-starter/src/main/resources/application-daily.properties +++ b/powerjob-server/powerjob-server-starter/src/main/resources/application-daily.properties @@ -10,9 +10,9 @@ spring.datasource.core.maximum-pool-size=20 spring.datasource.core.minimum-idle=5 ####### MongoDB properties(Non-core configuration properties) ####### -####### configure oms.mongodb.enable=false to disable mongodb ####### -oms.mongodb.enable=true -spring.data.mongodb.uri=mongodb+srv://zqq:No1Bug2Please3!@cluster0.wie54.gcp.mongodb.net/powerjob_daily?retryWrites=true&w=majority +####### delete mongodb config to disable mongodb ####### +#oms.mongodb.enable=true +#spring.data.mongodb.uri=mongodb+srv://zqq:No1Bug2Please3!@cluster0.wie54.gcp.mongodb.net/powerjob_daily?retryWrites=true&w=majority ####### Email properties(Non-core configuration properties) ####### ####### Delete the following code to disable the mail ####### diff --git a/powerjob-server/powerjob-server-starter/src/main/resources/application-pre.properties b/powerjob-server/powerjob-server-starter/src/main/resources/application-pre.properties index 236a0dc4..f08f673b 100644 --- a/powerjob-server/powerjob-server-starter/src/main/resources/application-pre.properties +++ b/powerjob-server/powerjob-server-starter/src/main/resources/application-pre.properties @@ -10,7 +10,7 @@ spring.datasource.core.maximum-pool-size=20 spring.datasource.core.minimum-idle=5 ####### MongoDB properties(Non-core configuration properties) ####### -####### configure oms.mongodb.enable=false to disable mongodb ####### +####### delete mongodb config to disable mongodb ####### oms.mongodb.enable=true spring.data.mongodb.uri=mongodb://remotehost:27017/powerjob-pre diff --git a/powerjob-server/powerjob-server-starter/src/main/resources/application-product.properties b/powerjob-server/powerjob-server-starter/src/main/resources/application-product.properties index 5c41c152..a3ed4fe1 100644 --- a/powerjob-server/powerjob-server-starter/src/main/resources/application-product.properties +++ b/powerjob-server/powerjob-server-starter/src/main/resources/application-product.properties @@ -10,7 +10,7 @@ spring.datasource.core.maximum-pool-size=20 spring.datasource.core.minimum-idle=5 ####### MongoDB properties(Non-core configuration properties) ####### -####### configure oms.mongodb.enable=false to disable mongodb ####### +####### delete mongodb config to disable mongodb ####### oms.mongodb.enable=true spring.data.mongodb.uri=mongodb://localhost:27017/powerjob-product diff --git a/powerjob-server/powerjob-server-starter/src/test/java/tech/powerjob/server/test/RepositoryTest.java b/powerjob-server/powerjob-server-starter/src/test/java/tech/powerjob/server/test/RepositoryTest.java index 74566dcc..0ea80988 100644 --- a/powerjob-server/powerjob-server-starter/src/test/java/tech/powerjob/server/test/RepositoryTest.java +++ b/powerjob-server/powerjob-server-starter/src/test/java/tech/powerjob/server/test/RepositoryTest.java @@ -1,7 +1,12 @@ package tech.powerjob.server.test; +import org.assertj.core.util.Lists; import org.junit.jupiter.api.Test; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.PageRequest; import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.transaction.annotation.Transactional; import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.enums.WorkflowInstanceStatus; @@ -10,14 +15,11 @@ import tech.powerjob.server.common.constants.SwitchableStatus; import tech.powerjob.server.persistence.remote.model.InstanceInfoDO; import tech.powerjob.server.persistence.remote.model.JobInfoDO; import tech.powerjob.server.persistence.remote.model.OmsLockDO; +import tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo; import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository; import tech.powerjob.server.persistence.remote.repository.JobInfoRepository; import tech.powerjob.server.persistence.remote.repository.OmsLockRepository; import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository; -import org.assertj.core.util.Lists; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.junit4.SpringRunner; -import org.springframework.transaction.annotation.Transactional; import javax.annotation.Resource; import java.util.Date; @@ -93,7 +95,7 @@ public class RepositoryTest { public void testCheckQuery() { Date time = new Date(); System.out.println(time); - final List res = instanceInfoRepository.findByAppIdInAndStatusAndGmtModifiedBefore(Lists.newArrayList(1L), 3, time); + final List res = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(Lists.newArrayList(1L), 3, time, PageRequest.of(0, 100)); System.out.println(res); } diff --git a/powerjob-worker-agent/pom.xml b/powerjob-worker-agent/pom.xml index 79b88976..7449eabc 100644 --- a/powerjob-worker-agent/pom.xml +++ b/powerjob-worker-agent/pom.xml @@ -10,12 +10,12 @@ 4.0.0 powerjob-worker-agent - 4.2.0 + 4.2.1 jar - 4.2.0 + 4.2.1 1.2.9 4.3.2 diff --git a/powerjob-worker-samples/pom.xml b/powerjob-worker-samples/pom.xml index 62b2c058..091c275e 100644 --- a/powerjob-worker-samples/pom.xml +++ b/powerjob-worker-samples/pom.xml @@ -10,11 +10,11 @@ 4.0.0 powerjob-worker-samples - 4.2.0 + 4.2.1 2.7.4 - 4.2.0 + 4.2.1 1.2.83 1.2.1 diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/BroadcastProcessorDemo.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/BroadcastProcessorDemo.java index 72bb72da..7515cb51 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/BroadcastProcessorDemo.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/BroadcastProcessorDemo.java @@ -22,12 +22,12 @@ import java.util.List; public class BroadcastProcessorDemo implements BroadcastProcessor { @Override - public ProcessResult preProcess(TaskContext context) throws Exception { + public ProcessResult preProcess(TaskContext context) { System.out.println("===== BroadcastProcessorDemo#preProcess ======"); context.getOmsLogger().info("BroadcastProcessorDemo#preProcess, current host: {}", NetUtils.getLocalHost()); if ("rootFailed".equals(context.getJobParams())) { return new ProcessResult(false, "console need failed"); - }else { + } else { return new ProcessResult(true); } } @@ -40,7 +40,7 @@ public class BroadcastProcessorDemo implements BroadcastProcessor { long sleepTime = 1000; try { sleepTime = Long.parseLong(taskContext.getJobParams()); - }catch (Exception e) { + } catch (Exception e) { logger.warn("[BroadcastProcessor] parse sleep time failed!", e); } Thread.sleep(Math.max(sleepTime, 1000)); @@ -48,7 +48,7 @@ public class BroadcastProcessorDemo implements BroadcastProcessor { } @Override - public ProcessResult postProcess(TaskContext context, List taskResults) throws Exception { + public ProcessResult postProcess(TaskContext context, List taskResults) { System.out.println("===== BroadcastProcessorDemo#postProcess ======"); context.getOmsLogger().info("BroadcastProcessorDemo#postProcess, current host: {}, taskResult: {}", NetUtils.getLocalHost(), taskResults); return new ProcessResult(true, "success"); diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapProcessorDemo.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapProcessorDemo.java index e089532d..e9f03276 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapProcessorDemo.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapProcessorDemo.java @@ -39,13 +39,13 @@ public class MapProcessorDemo implements MapProcessor { @Override public ProcessResult process(TaskContext context) throws Exception { - System.out.println("============== MapProcessorDemo#process =============="); - System.out.println("isRootTask:" + isRootTask()); - System.out.println("taskContext:" + JsonUtils.toJSONString(context)); - System.out.println(mysteryService.hasaki()); + log.info("============== MapProcessorDemo#process =============="); + log.info("isRootTask:{}", isRootTask()); + log.info("taskContext:{}", JsonUtils.toJSONString(context)); + log.info("{}", mysteryService.hasaki()); if (isRootTask()) { - System.out.println("==== MAP ===="); + log.info("==== MAP ===="); List subTasks = Lists.newLinkedList(); for (int j = 0; j < BATCH_NUM; j++) { SubTask subTask = new SubTask(); @@ -60,16 +60,16 @@ public class MapProcessorDemo implements MapProcessor { return new ProcessResult(true, "map successfully"); } else { - System.out.println("==== PROCESS ===="); + log.info("==== PROCESS ===="); SubTask subTask = (SubTask) context.getSubTask(); for (Integer itemId : subTask.getItemIds()) { if (Thread.interrupted()) { // 任务被中断 - System.out.println("job has been stop! so stop to process subTask:" + subTask.getSiteId() + "=>" + itemId); + log.info("job has been stop! so stop to process subTask: {} => {}", subTask.getSiteId(), itemId); break; } - System.out.println("processing subTask: " + subTask.getSiteId() + "=>" + itemId); - int max = Integer.MAX_VALUE >> 4; + log.info("processing subTask: {} => {}", subTask.getSiteId(), itemId); + int max = Integer.MAX_VALUE >> 7; for (int i = 0; ; i++) { // 模拟耗时操作 if (i > max) { @@ -80,6 +80,10 @@ public class MapProcessorDemo implements MapProcessor { // 测试在 Map 任务中追加上下文 context.getWorkflowContext().appendData2WfContext("Yasuo", "A sword's poor company for a long road."); boolean b = ThreadLocalRandom.current().nextBoolean(); + if (context.getCurrentRetryTimes() >= 1) { + // 重试的话一定会成功 + b = true; + } return new ProcessResult(b, "RESULT:" + b); } } diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapReduceProcessorDemo.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapReduceProcessorDemo.java index b761a770..215a95a0 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapReduceProcessorDemo.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/MapReduceProcessorDemo.java @@ -35,9 +35,9 @@ public class MapReduceProcessorDemo implements MapReduceProcessor { OmsLogger omsLogger = context.getOmsLogger(); - System.out.println("============== TestMapReduceProcessor#process =============="); - System.out.println("isRootTask:" + isRootTask()); - System.out.println("taskContext:" + JsonUtils.toJSONString(context)); + log.info("============== TestMapReduceProcessor#process =============="); + log.info("isRootTask:{}", isRootTask()); + log.info("taskContext:{}", JsonUtils.toJSONString(context)); // 根据控制台参数获取MR批次及子任务大小 final JSONObject jobParams = JSONObject.parseObject(context.getJobParams()); @@ -46,7 +46,7 @@ public class MapReduceProcessorDemo implements MapReduceProcessor { Integer batchNum = (Integer) jobParams.getOrDefault("batchNum", 10); if (isRootTask()) { - System.out.println("==== MAP ===="); + log.info("==== MAP ===="); omsLogger.info("[DemoMRProcessor] start root task~"); List subTasks = Lists.newLinkedList(); for (int j = 0; j < batchNum; j++) { @@ -59,14 +59,14 @@ public class MapReduceProcessorDemo implements MapReduceProcessor { } omsLogger.info("[DemoMRProcessor] map success~"); return new ProcessResult(true, "MAP_SUCCESS"); - }else { - System.out.println("==== NORMAL_PROCESS ===="); + } else { + log.info("==== NORMAL_PROCESS ===="); omsLogger.info("[DemoMRProcessor] process subTask: {}.", JSON.toJSONString(context.getSubTask())); - System.out.println("subTask: " + JsonUtils.toJSONString(context.getSubTask())); + log.info("subTask: {}", JsonUtils.toJSONString(context.getSubTask())); Thread.sleep(1000); if (context.getCurrentRetryTimes() == 0) { return new ProcessResult(false, "FIRST_FAILED"); - }else { + } else { return new ProcessResult(true, "PROCESS_SUCCESS"); } } diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/StandaloneProcessorDemo.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/StandaloneProcessorDemo.java index 06a96a90..d039c73d 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/StandaloneProcessorDemo.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/StandaloneProcessorDemo.java @@ -1,5 +1,6 @@ package tech.powerjob.samples.processors; +import org.apache.commons.lang3.StringUtils; import tech.powerjob.worker.core.processor.ProcessResult; import tech.powerjob.worker.core.processor.TaskContext; import tech.powerjob.worker.core.processor.sdk.BasicProcessor; @@ -21,25 +22,29 @@ public class StandaloneProcessorDemo implements BasicProcessor { @Override public ProcessResult process(TaskContext context) throws Exception { - OmsLogger omsLogger = context.getOmsLogger(); omsLogger.info("StandaloneProcessorDemo start process,context is {}.", context); omsLogger.info("Notice! If you want this job process failed, your jobParams need to be 'failed'"); - omsLogger.info("Let's test the exception~"); // 测试异常日志 try { Collections.emptyList().add("277"); - }catch (Exception e) { + } catch (Exception e) { omsLogger.error("oh~it seems that we have an exception~", e); } - - System.out.println("================ StandaloneProcessorDemo#process ================"); - System.out.println(context.getJobParams()); - // 根据控制台参数判断是否成功 - boolean success = !"failed".equals(context.getJobParams()); - omsLogger.info("StandaloneProcessorDemo finished process,success: .", success); - + log.info("================ StandaloneProcessorDemo#process ================"); + log.info("jobParam:{}", context.getJobParams()); + log.info("instanceParams:{}", context.getInstanceParams()); + String param; + // 解析参数,非处于工作流中时,优先取实例参数(允许动态[instanceParams]覆盖静态参数[jobParams]) + if (context.getWorkflowContext() == null) { + param = StringUtils.isBlank(context.getInstanceParams()) ? context.getJobParams() : context.getInstanceParams(); + } else { + param = context.getJobParams(); + } + // 根据参数判断是否成功 + boolean success = !"failed".equals(param); + omsLogger.info("StandaloneProcessorDemo finished process,success: {}", success); omsLogger.info("anyway, we finished the job successfully~Congratulations!"); return new ProcessResult(success, context + ": " + success); } diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/TimeoutProcessor.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/TimeoutProcessor.java index 85c1a208..e630ae8b 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/TimeoutProcessor.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/processors/TimeoutProcessor.java @@ -1,20 +1,24 @@ package tech.powerjob.samples.processors; +import lombok.extern.slf4j.Slf4j; import tech.powerjob.worker.core.processor.ProcessResult; import tech.powerjob.worker.core.processor.TaskContext; import tech.powerjob.worker.core.processor.sdk.BasicProcessor; import org.springframework.stereotype.Component; /** - * 测试超时任务 + * 测试超时任务(可中断) * * @author tjq * @since 2020/4/20 */ @Component +@Slf4j public class TimeoutProcessor implements BasicProcessor { @Override public ProcessResult process(TaskContext context) throws Exception { + long sleepTime = Long.parseLong(context.getJobParams()); + log.info("TaskInstance({}) will sleep {} ms", context.getInstanceId(), sleepTime); Thread.sleep(Long.parseLong(context.getJobParams())); return new ProcessResult(true, "impossible~~~~QAQ~"); } diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/AppendWorkflowContextTester.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/AppendWorkflowContextTester.java index f735b400..05133263 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/AppendWorkflowContextTester.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/AppendWorkflowContextTester.java @@ -19,7 +19,6 @@ public class AppendWorkflowContextTester implements BasicProcessor { private static final String FAIL_CODE = "0"; - @Override @SuppressWarnings("squid:S106") public ProcessResult process(TaskContext context) throws Exception { diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceTester.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceTester.java index 58819b89..50fbdbdd 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceTester.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceTester.java @@ -6,20 +6,22 @@ import tech.powerjob.worker.core.processor.sdk.BasicProcessor; import org.springframework.stereotype.Component; /** - * 测试用户反馈的无法停止实例的问题 + * 测试用户反馈的无法停止实例的问题 (可中断) * https://github.com/PowerJob/PowerJob/issues/37 * * @author tjq * @since 2020/7/30 */ @Component +@SuppressWarnings("all") public class StopInstanceTester implements BasicProcessor { @Override public ProcessResult process(TaskContext context) throws Exception { int i = 0; while (true) { System.out.println(i++); - Thread.sleep(1000*10); + // interruptable + Thread.sleep(10000L); } } } diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceUninterruptibleTester.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceUninterruptibleTester.java new file mode 100644 index 00000000..a298afb0 --- /dev/null +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/tester/StopInstanceUninterruptibleTester.java @@ -0,0 +1,30 @@ +package tech.powerjob.samples.tester; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; +import tech.powerjob.worker.core.processor.ProcessResult; +import tech.powerjob.worker.core.processor.TaskContext; +import tech.powerjob.worker.core.processor.sdk.BasicProcessor; + +/** + * 停止实例 (不可中断) + * + * @author Echo009 + * @since 2023/1/15 + */ +@Component +@Slf4j +@SuppressWarnings("all") +public class StopInstanceUninterruptibleTester implements BasicProcessor { + @Override + public ProcessResult process(TaskContext context) throws Exception { + int i = 0; + while (true) { + // uninterruptible + i++; + if (i % 1000000000 == 0){ + log.info("taskInstance({}) is running ...",context.getInstanceId()); + } + } + } +} diff --git a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/workflow/WorkflowStandaloneProcessor.java b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/workflow/WorkflowStandaloneProcessor.java index 02e51dd9..8fdb93e9 100644 --- a/powerjob-worker-samples/src/main/java/tech/powerjob/samples/workflow/WorkflowStandaloneProcessor.java +++ b/powerjob-worker-samples/src/main/java/tech/powerjob/samples/workflow/WorkflowStandaloneProcessor.java @@ -1,6 +1,7 @@ package tech.powerjob.samples.workflow; import com.alibaba.fastjson.JSON; +import lombok.extern.slf4j.Slf4j; import tech.powerjob.worker.core.processor.ProcessResult; import tech.powerjob.worker.core.processor.TaskContext; import tech.powerjob.worker.core.processor.sdk.BasicProcessor; @@ -16,20 +17,20 @@ import java.util.Map; * @since 2020/6/2 */ @Component +@Slf4j public class WorkflowStandaloneProcessor implements BasicProcessor { @Override public ProcessResult process(TaskContext context) throws Exception { OmsLogger logger = context.getOmsLogger(); - logger.info("current:" + context.getJobParams()); - System.out.println("jobParams: " + context.getJobParams()); - System.out.println("currentContext:"+JSON.toJSONString(context)); + logger.info("current jobParams: {}", context.getJobParams()); + logger.info("current context: {}", context.getWorkflowContext()); + log.info("jobParams:{}", context.getJobParams()); + log.info("currentContext:{}", JSON.toJSONString(context)); // 尝试获取上游任务 Map workflowContext = context.getWorkflowContext().fetchWorkflowContext(); - System.out.println("工作流上下文数据:"); - System.out.println(workflowContext); - + log.info("工作流上下文数据:{}", workflowContext); return new ProcessResult(true, context.getJobId() + " process successfully."); } } diff --git a/powerjob-worker-spring-boot-starter/pom.xml b/powerjob-worker-spring-boot-starter/pom.xml index 7de6e43f..cc79470f 100644 --- a/powerjob-worker-spring-boot-starter/pom.xml +++ b/powerjob-worker-spring-boot-starter/pom.xml @@ -10,11 +10,11 @@ 4.0.0 powerjob-worker-spring-boot-starter - 4.2.0 + 4.2.1 jar - 4.2.0 + 4.2.1 2.7.4 diff --git a/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobAutoConfiguration.java b/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobAutoConfiguration.java index ccd1617f..a5e079c2 100644 --- a/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobAutoConfiguration.java +++ b/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobAutoConfiguration.java @@ -69,13 +69,17 @@ public class PowerJobAutoConfiguration { */ config.setEnableTestMode(worker.isEnableTestMode()); /* - * Max length of appended workflow context . Appended workflow context value that is longer than the value will be ignore. + * Max length of appended workflow context . Appended workflow context value that is longer than the value will be ignored. */ config.setMaxAppendedWfContextLength(worker.getMaxAppendedWfContextLength()); - /* - * Worker Tag - */ + config.setTag(worker.getTag()); + + config.setMaxHeavyweightTaskNum(worker.getMaxHeavyweightTaskNum()); + + config.setMaxLightweightTaskNum(worker.getMaxLightweightTaskNum()); + + config.setHealthReportInterval(worker.getHealthReportInterval()); /* * Create OhMyWorker object and set properties. */ diff --git a/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobProperties.java b/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobProperties.java index d58f86d1..4074c5ff 100644 --- a/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobProperties.java +++ b/powerjob-worker-spring-boot-starter/src/main/java/tech/powerjob/worker/autoconfigure/PowerJobProperties.java @@ -90,17 +90,6 @@ public class PowerJobProperties { getWorker().setEnableTestMode(enableTestMode); } - @Deprecated - @DeprecatedConfigurationProperty(replacement = "powerjob.worker.tag") - public String getTag() { - return getWorker().tag; - } - - @Deprecated - public void setTag(String tag) { - getWorker().setTag(tag); - } - /** * Powerjob worker configuration properties. */ @@ -147,17 +136,28 @@ public class PowerJobProperties { private int maxResultLength = 8192; /** * If test mode is set as true, Powerjob-worker no longer connects to the server or validates appName. - * Test mode is used for conditions that your have no powerjob-server in your develop env so you can't startup the application + * Test mode is used for conditions that your have no powerjob-server in your develop env, so you can't start up the application */ private boolean enableTestMode = false; /** - * Max length of appended workflow context value length. Appended workflow context value that is longer than the value will be ignore. + * Max length of appended workflow context value length. Appended workflow context value that is longer than the value will be ignored. * {@link WorkflowContext} max length for #appendedContextData */ private int maxAppendedWfContextLength = 8192; - /** - * Worker Tag - */ + private String tag; + /** + * Max numbers of LightTaskTacker + */ + private Integer maxLightweightTaskNum = 1024; + /** + * Max numbers of HeavyTaskTacker + */ + private Integer maxHeavyweightTaskNum = 64; + /** + * Interval(s) of worker health report + */ + private Integer healthReportInterval = 10; + } } diff --git a/powerjob-worker/pom.xml b/powerjob-worker/pom.xml index b45481af..be2e8535 100644 --- a/powerjob-worker/pom.xml +++ b/powerjob-worker/pom.xml @@ -10,17 +10,17 @@ 4.0.0 powerjob-worker - 4.2.0 + 4.2.1 jar 5.3.23 - 4.2.0 - 1.4.200 - 3.4.2 - 5.6.1 + 4.2.1 + 2.1.214 + 4.0.3 + 5.9.1 - 1.2.3 + 1.2.9 @@ -45,7 +45,7 @@ h2 ${h2.db.version} - + com.zaxxer HikariCP diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/PowerJobWorker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/PowerJobWorker.java index b55fd18a..ef88197f 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/PowerJobWorker.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/PowerJobWorker.java @@ -5,28 +5,8 @@ import akka.actor.ActorSystem; import akka.actor.DeadLetter; import akka.actor.Props; import akka.routing.RoundRobinPool; -import tech.powerjob.common.exception.PowerJobException; -import tech.powerjob.common.RemoteConstant; -import tech.powerjob.common.response.ResultDTO; -import tech.powerjob.common.utils.CommonUtils; -import tech.powerjob.common.utils.HttpUtils; -import tech.powerjob.common.serialize.JsonUtils; -import tech.powerjob.common.utils.NetUtils; -import tech.powerjob.worker.actors.ProcessorTrackerActor; -import tech.powerjob.worker.actors.TaskTrackerActor; -import tech.powerjob.worker.actors.TroubleshootingActor; -import tech.powerjob.worker.actors.WorkerActor; -import tech.powerjob.worker.background.OmsLogHandler; -import tech.powerjob.worker.background.ServerDiscoveryService; -import tech.powerjob.worker.background.WorkerHealthReporter; -import tech.powerjob.worker.common.PowerJobWorkerConfig; -import tech.powerjob.worker.common.PowerBannerPrinter; -import tech.powerjob.worker.common.WorkerRuntime; -import tech.powerjob.worker.common.utils.SpringUtils; -import tech.powerjob.worker.persistence.TaskPersistenceService; import com.google.common.base.Stopwatch; import com.google.common.collect.Maps; -import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import lombok.extern.slf4j.Slf4j; @@ -35,12 +15,29 @@ import org.springframework.beans.factory.DisposableBean; import org.springframework.beans.factory.InitializingBean; import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContextAware; +import tech.powerjob.common.RemoteConstant; +import tech.powerjob.common.exception.PowerJobException; +import tech.powerjob.common.response.ResultDTO; +import tech.powerjob.common.serialize.JsonUtils; +import tech.powerjob.common.utils.CommonUtils; +import tech.powerjob.common.utils.HttpUtils; +import tech.powerjob.common.utils.NetUtils; +import tech.powerjob.worker.actors.ProcessorTrackerActor; +import tech.powerjob.worker.actors.TaskTrackerActor; +import tech.powerjob.worker.actors.TroubleshootingActor; +import tech.powerjob.worker.actors.WorkerActor; +import tech.powerjob.worker.background.OmsLogHandler; +import tech.powerjob.worker.background.ServerDiscoveryService; +import tech.powerjob.worker.background.WorkerHealthReporter; +import tech.powerjob.worker.common.PowerBannerPrinter; +import tech.powerjob.worker.common.PowerJobWorkerConfig; +import tech.powerjob.worker.common.WorkerRuntime; +import tech.powerjob.worker.common.utils.SpringUtils; +import tech.powerjob.worker.core.executor.ExecutorManager; +import tech.powerjob.worker.persistence.TaskPersistenceService; import java.util.Map; import java.util.Objects; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -53,8 +50,8 @@ import java.util.concurrent.atomic.AtomicBoolean; @Slf4j public class PowerJobWorker implements ApplicationContextAware, InitializingBean, DisposableBean { - private ScheduledExecutorService timingPool; private final WorkerRuntime workerRuntime = new WorkerRuntime(); + private final AtomicBoolean initialized = new AtomicBoolean(); @Override @@ -78,6 +75,10 @@ public class PowerJobWorker implements ApplicationContextAware, InitializingBean log.info("[PowerJobWorker] start to initialize PowerJobWorker..."); PowerJobWorkerConfig config = workerRuntime.getWorkerConfig(); + + // 打印 worker 配置 + log.info("[PowerJobWorker] worker config: {}", JsonUtils.toJSONString(config)); + CommonUtils.requireNonNull(config, "can't find OhMyConfig, please set OhMyConfig first"); try { @@ -85,7 +86,7 @@ public class PowerJobWorker implements ApplicationContextAware, InitializingBean // 校验 appName if (!config.isEnableTestMode()) { assertAppName(); - }else { + } else { log.warn("[PowerJobWorker] using TestMode now, it's dangerous if this is production env."); } @@ -93,14 +94,9 @@ public class PowerJobWorker implements ApplicationContextAware, InitializingBean String workerAddress = NetUtils.getLocalHost() + ":" + config.getPort(); workerRuntime.setWorkerAddress(workerAddress); - // 初始化定时线程池 - ThreadFactory timingPoolFactory = new ThreadFactoryBuilder().setNameFormat("oms-worker-timing-pool-%d").build(); - timingPool = Executors.newScheduledThreadPool(3, timingPoolFactory); - - // 连接 server - ServerDiscoveryService serverDiscoveryService = new ServerDiscoveryService(workerRuntime.getAppId(), workerRuntime.getWorkerConfig()); - serverDiscoveryService.start(timingPool); - workerRuntime.setServerDiscoveryService(serverDiscoveryService); + // 初始化 线程池 + final ExecutorManager executorManager = new ExecutorManager(workerRuntime.getWorkerConfig()); + workerRuntime.setExecutorManager(executorManager); // 初始化 ActorSystem(macOS上 new ServerSocket 检测端口占用的方法并不生效,可能是AKKA是Scala写的缘故?没办法...只能靠异常重试了) Map overrideConfig = Maps.newHashMap(); @@ -114,6 +110,12 @@ public class PowerJobWorker implements ApplicationContextAware, InitializingBean ActorSystem actorSystem = ActorSystem.create(RemoteConstant.WORKER_ACTOR_SYSTEM_NAME, akkaFinalConfig); workerRuntime.setActorSystem(actorSystem); + // 连接 server + ServerDiscoveryService serverDiscoveryService = new ServerDiscoveryService(workerRuntime.getAppId(), workerRuntime.getWorkerConfig()); + + serverDiscoveryService.start(workerRuntime.getExecutorManager().getCoreExecutor()); + workerRuntime.setServerDiscoveryService(serverDiscoveryService); + ActorRef taskTrackerActorRef = actorSystem.actorOf(TaskTrackerActor.props(workerRuntime) .withDispatcher("akka.task-tracker-dispatcher") .withRouter(new RoundRobinPool(cores * 2)), RemoteConstant.TASK_TRACKER_ACTOR_NAME); @@ -141,9 +143,10 @@ public class PowerJobWorker implements ApplicationContextAware, InitializingBean workerRuntime.setTaskPersistenceService(taskPersistenceService); log.info("[PowerJobWorker] local storage initialized successfully."); + // 初始化定时任务 - timingPool.scheduleAtFixedRate(new WorkerHealthReporter(workerRuntime), 0, 15, TimeUnit.SECONDS); - timingPool.scheduleWithFixedDelay(omsLogHandler.logSubmitter, 0, 5, TimeUnit.SECONDS); + workerRuntime.getExecutorManager().getCoreExecutor().scheduleAtFixedRate(new WorkerHealthReporter(workerRuntime), 0, config.getHealthReportInterval(), TimeUnit.SECONDS); + workerRuntime.getExecutorManager().getCoreExecutor().scheduleWithFixedDelay(omsLogHandler.logSubmitter, 0, 5, TimeUnit.SECONDS); log.info("[PowerJobWorker] PowerJobWorker initialized successfully, using time: {}, congratulations!", stopwatch); }catch (Exception e) { @@ -190,7 +193,7 @@ public class PowerJobWorker implements ApplicationContextAware, InitializingBean @Override public void destroy() throws Exception { - timingPool.shutdownNow(); + workerRuntime.getExecutorManager().shutdown(); workerRuntime.getActorSystem().terminate(); } } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/actors/ProcessorTrackerActor.java b/powerjob-worker/src/main/java/tech/powerjob/worker/actors/ProcessorTrackerActor.java index 12893774..3c0bffbc 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/actors/ProcessorTrackerActor.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/actors/ProcessorTrackerActor.java @@ -4,7 +4,7 @@ import akka.actor.AbstractActor; import akka.actor.Props; import tech.powerjob.worker.common.WorkerRuntime; import tech.powerjob.worker.core.tracker.processor.ProcessorTracker; -import tech.powerjob.worker.core.tracker.processor.ProcessorTrackerPool; +import tech.powerjob.worker.core.tracker.manager.ProcessorTrackerManager; import tech.powerjob.worker.persistence.TaskDO; import tech.powerjob.worker.pojo.request.TaskTrackerStartTaskReq; import tech.powerjob.worker.pojo.request.TaskTrackerStopInstanceReq; @@ -48,7 +48,7 @@ public class ProcessorTrackerActor extends AbstractActor { Long instanceId = req.getInstanceInfo().getInstanceId(); // 创建 ProcessorTracker 一定能成功 - ProcessorTracker processorTracker = ProcessorTrackerPool.getProcessorTracker( + ProcessorTracker processorTracker = ProcessorTrackerManager.getProcessorTracker( instanceId, req.getTaskTrackerAddress(), () -> new ProcessorTracker(req, workerRuntime)); @@ -71,7 +71,7 @@ public class ProcessorTrackerActor extends AbstractActor { private void onReceiveTaskTrackerStopInstanceReq(TaskTrackerStopInstanceReq req) { Long instanceId = req.getInstanceId(); - List removedPts = ProcessorTrackerPool.removeProcessorTracker(instanceId); + List removedPts = ProcessorTrackerManager.removeProcessorTracker(instanceId); if (!CollectionUtils.isEmpty(removedPts)) { removedPts.forEach(ProcessorTracker::destroy); } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/actors/TaskTrackerActor.java b/powerjob-worker/src/main/java/tech/powerjob/worker/actors/TaskTrackerActor.java index a7aa547f..c95a585b 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/actors/TaskTrackerActor.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/actors/TaskTrackerActor.java @@ -2,22 +2,27 @@ package tech.powerjob.worker.actors; import akka.actor.AbstractActor; import akka.actor.Props; +import com.google.common.collect.Lists; +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import tech.powerjob.common.enums.ExecuteType; +import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.model.InstanceDetail; import tech.powerjob.common.request.ServerQueryInstanceStatusReq; import tech.powerjob.common.request.ServerScheduleJobReq; import tech.powerjob.common.request.ServerStopInstanceReq; +import tech.powerjob.common.response.AskResponse; import tech.powerjob.worker.common.WorkerRuntime; import tech.powerjob.worker.common.constants.TaskStatus; +import tech.powerjob.worker.core.tracker.manager.HeavyTaskTrackerManager; +import tech.powerjob.worker.core.tracker.manager.LightTaskTrackerManager; import tech.powerjob.worker.core.tracker.task.TaskTracker; -import tech.powerjob.worker.core.tracker.task.TaskTrackerPool; +import tech.powerjob.worker.core.tracker.task.heavy.HeavyTaskTracker; +import tech.powerjob.worker.core.tracker.task.light.LightTaskTracker; import tech.powerjob.worker.persistence.TaskDO; import tech.powerjob.worker.pojo.request.ProcessorMapTaskRequest; import tech.powerjob.worker.pojo.request.ProcessorReportTaskStatusReq; -import tech.powerjob.common.response.AskResponse; import tech.powerjob.worker.pojo.request.ProcessorTrackerStatusReportReq; -import com.google.common.collect.Lists; -import lombok.AllArgsConstructor; -import lombok.extern.slf4j.Slf4j; import java.util.List; @@ -57,10 +62,10 @@ public class TaskTrackerActor extends AbstractActor { private void onReceiveProcessorReportTaskStatusReq(ProcessorReportTaskStatusReq req) { int taskStatus = req.getStatus(); - TaskTracker taskTracker = TaskTrackerPool.getTaskTrackerPool(req.getInstanceId()); - + // 只有重量级任务才会有两级任务状态上报的机制 + HeavyTaskTracker taskTracker = HeavyTaskTrackerManager.getTaskTracker(req.getInstanceId()); // 结束状态需要回复接受成功 - if (TaskStatus.finishedStatus.contains(taskStatus)) { + if (TaskStatus.FINISHED_STATUS.contains(taskStatus)) { AskResponse askResponse = AskResponse.succeed(null); getSender().tell(askResponse, getSelf()); } @@ -86,7 +91,7 @@ public class TaskTrackerActor extends AbstractActor { */ private void onReceiveProcessorMapTaskRequest(ProcessorMapTaskRequest req) { - TaskTracker taskTracker = TaskTrackerPool.getTaskTrackerPool(req.getInstanceId()); + HeavyTaskTracker taskTracker = HeavyTaskTrackerManager.getTaskTracker(req.getInstanceId()); if (taskTracker == null) { log.warn("[TaskTrackerActor] receive ProcessorMapTaskRequest({}) but system can't find TaskTracker.", req); return; @@ -123,17 +128,41 @@ public class TaskTrackerActor extends AbstractActor { * 服务器任务调度处理器 */ private void onReceiveServerScheduleJobReq(ServerScheduleJobReq req) { - Long instanceId = req.getInstanceId(); - TaskTracker taskTracker = TaskTrackerPool.getTaskTrackerPool(instanceId); - - if (taskTracker != null) { - log.warn("[TaskTrackerActor] TaskTracker({}) for instance(id={}) already exists.", taskTracker, instanceId); - return; - } - log.debug("[TaskTrackerActor] server schedule job by request: {}.", req); - // 原子创建,防止多实例的存在 - TaskTrackerPool.atomicCreateTaskTracker(instanceId, ignore -> TaskTracker.create(req, workerRuntime)); + Long instanceId = req.getInstanceId(); + // 区分轻量级任务模型以及重量级任务模型 + if (isLightweightTask(req)) { + final LightTaskTracker taskTracker = LightTaskTrackerManager.getTaskTracker(instanceId); + if (taskTracker != null) { + log.warn("[TaskTrackerActor] LightTaskTracker({}) for instance(id={}) already exists.", taskTracker, instanceId); + return; + } + // 判断是否已经 overload + if (LightTaskTrackerManager.currentTaskTrackerSize() >= workerRuntime.getWorkerConfig().getMaxLightweightTaskNum() * LightTaskTrackerManager.OVERLOAD_FACTOR) { + // ignore this request + log.warn("[TaskTrackerActor] this worker is overload,ignore this request(instanceId={}),current size = {}!",instanceId,LightTaskTrackerManager.currentTaskTrackerSize()); + return; + } + if (LightTaskTrackerManager.currentTaskTrackerSize() >= workerRuntime.getWorkerConfig().getMaxLightweightTaskNum()) { + log.warn("[TaskTrackerActor] this worker will be overload soon,current size = {}!",LightTaskTrackerManager.currentTaskTrackerSize()); + } + // 创建轻量级任务 + LightTaskTrackerManager.atomicCreateTaskTracker(instanceId, ignore -> LightTaskTracker.create(req, workerRuntime)); + } else { + HeavyTaskTracker taskTracker = HeavyTaskTrackerManager.getTaskTracker(instanceId); + if (taskTracker != null) { + log.warn("[TaskTrackerActor] HeavyTaskTracker({}) for instance(id={}) already exists.", taskTracker, instanceId); + return; + } + // 判断是否已经 overload + if (HeavyTaskTrackerManager.currentTaskTrackerSize() >= workerRuntime.getWorkerConfig().getMaxHeavyweightTaskNum()) { + // ignore this request + log.warn("[TaskTrackerActor] this worker is overload,ignore this request(instanceId={})! current size = {},", instanceId, HeavyTaskTrackerManager.currentTaskTrackerSize()); + return; + } + // 原子创建,防止多实例的存在 + HeavyTaskTrackerManager.atomicCreateTaskTracker(instanceId, ignore -> HeavyTaskTracker.create(req, workerRuntime)); + } } /** @@ -141,7 +170,7 @@ public class TaskTrackerActor extends AbstractActor { */ private void onReceiveProcessorTrackerStatusReportReq(ProcessorTrackerStatusReportReq req) { - TaskTracker taskTracker = TaskTrackerPool.getTaskTrackerPool(req.getInstanceId()); + HeavyTaskTracker taskTracker = HeavyTaskTrackerManager.getTaskTracker(req.getInstanceId()); if (taskTracker == null) { log.warn("[TaskTrackerActor] receive ProcessorTrackerStatusReportReq({}) but system can't find TaskTracker.", req); return; @@ -153,12 +182,20 @@ public class TaskTrackerActor extends AbstractActor { * 停止任务实例 */ private void onReceiveServerStopInstanceReq(ServerStopInstanceReq req) { - TaskTracker taskTracker = TaskTrackerPool.getTaskTrackerPool(req.getInstanceId()); - if (taskTracker == null) { - log.warn("[TaskTrackerActor] receive ServerStopInstanceReq({}) but system can't find TaskTracker.", req); + + + log.info("[TaskTrackerActor] receive ServerStopInstanceReq({}).", req); + HeavyTaskTracker heavyTaskTracker = HeavyTaskTrackerManager.getTaskTracker(req.getInstanceId()); + if (heavyTaskTracker != null) { + heavyTaskTracker.stopTask(); return; } - taskTracker.destroy(); + LightTaskTracker lightTaskTracker = LightTaskTrackerManager.getTaskTracker(req.getInstanceId()); + if (lightTaskTracker != null) { + lightTaskTracker.stopTask(); + return; + } + log.warn("[TaskTrackerActor] receive ServerStopInstanceReq({}) but system can't find TaskTracker.", req); } /** @@ -166,14 +203,26 @@ public class TaskTrackerActor extends AbstractActor { */ private void onReceiveServerQueryInstanceStatusReq(ServerQueryInstanceStatusReq req) { AskResponse askResponse; - TaskTracker taskTracker = TaskTrackerPool.getTaskTrackerPool(req.getInstanceId()); - if (taskTracker == null) { + TaskTracker taskTracker = HeavyTaskTrackerManager.getTaskTracker(req.getInstanceId()); + if (taskTracker == null && (taskTracker = LightTaskTrackerManager.getTaskTracker(req.getInstanceId())) == null) { log.warn("[TaskTrackerActor] receive ServerQueryInstanceStatusReq({}) but system can't find TaskTracker.", req); askResponse = AskResponse.failed("can't find TaskTracker"); - }else { + } else { InstanceDetail instanceDetail = taskTracker.fetchRunningStatus(); askResponse = AskResponse.succeed(instanceDetail); } getSender().tell(askResponse, getSelf()); } + + + private boolean isLightweightTask(ServerScheduleJobReq serverScheduleJobReq) { + final ExecuteType executeType = ExecuteType.valueOf(serverScheduleJobReq.getExecuteType()); + // 非单机执行的一定不是 + if (executeType != ExecuteType.STANDALONE){ + return false; + } + TimeExpressionType timeExpressionType = TimeExpressionType.valueOf(serverScheduleJobReq.getTimeExpressionType()); + // 固定频率以及固定延迟的也一定不是 + return timeExpressionType != TimeExpressionType.FIXED_DELAY && timeExpressionType != TimeExpressionType.FIXED_RATE; + } } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/background/OmsLogHandler.java b/powerjob-worker/src/main/java/tech/powerjob/worker/background/OmsLogHandler.java index ad4f51ba..54010994 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/background/OmsLogHandler.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/background/OmsLogHandler.java @@ -9,7 +9,7 @@ import tech.powerjob.worker.common.utils.AkkaUtils; import com.google.common.collect.Lists; import com.google.common.collect.Queues; import lombok.extern.slf4j.Slf4j; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.List; import java.util.concurrent.BlockingQueue; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/background/ServerDiscoveryService.java b/powerjob-worker/src/main/java/tech/powerjob/worker/background/ServerDiscoveryService.java index 35e1c9a0..7e25a62f 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/background/ServerDiscoveryService.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/background/ServerDiscoveryService.java @@ -1,17 +1,17 @@ package tech.powerjob.worker.background; -import tech.powerjob.common.exception.PowerJobException; -import tech.powerjob.common.response.ResultDTO; -import tech.powerjob.common.utils.CommonUtils; -import tech.powerjob.common.serialize.JsonUtils; -import tech.powerjob.common.utils.HttpUtils; -import tech.powerjob.worker.common.PowerJobWorkerConfig; -import tech.powerjob.worker.core.tracker.task.TaskTracker; -import tech.powerjob.worker.core.tracker.task.TaskTrackerPool; import com.google.common.collect.Maps; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.util.CollectionUtils; +import tech.powerjob.common.exception.PowerJobException; +import tech.powerjob.common.response.ResultDTO; +import tech.powerjob.common.serialize.JsonUtils; +import tech.powerjob.common.utils.CommonUtils; +import tech.powerjob.common.utils.HttpUtils; +import tech.powerjob.worker.common.PowerJobWorkerConfig; +import tech.powerjob.worker.core.tracker.task.heavy.HeavyTaskTracker; +import tech.powerjob.worker.core.tracker.manager.HeavyTaskTrackerManager; import java.util.List; import java.util.Map; @@ -34,13 +34,20 @@ public class ServerDiscoveryService { private final Map ip2Address = Maps.newHashMap(); - // 服务发现地址 + /** + * 服务发现地址 + */ private static final String DISCOVERY_URL = "http://%s/server/acquire?appId=%d¤tServer=%s&protocol=AKKA"; - // 失败次数 + /** + * 失败次数 + */ private static int FAILED_COUNT = 0; - // 最大失败次数 + /** + * 最大失败次数 + */ private static final int MAX_FAILED_COUNT = 3; + public ServerDiscoveryService(Long appId, PowerJobWorkerConfig config) { this.appId = appId; this.config = config; @@ -51,7 +58,15 @@ public class ServerDiscoveryService { if (org.springframework.util.StringUtils.isEmpty(this.currentServerAddress) && !config.isEnableTestMode()) { throw new PowerJobException("can't find any available server, this worker has been quarantined."); } - timingPool.scheduleAtFixedRate(() -> this.currentServerAddress = discovery(), 10, 10, TimeUnit.SECONDS); + // 这里必须保证成功 + timingPool.scheduleAtFixedRate(() -> { + try { + this.currentServerAddress = discovery(); + } catch (Exception e) { + log.error("[PowerDiscovery] fail to discovery server!", e); + } + } + , 10, 10, TimeUnit.SECONDS); } public String getCurrentServerAddress() { @@ -87,16 +102,16 @@ public class ServerDiscoveryService { } if (StringUtils.isEmpty(result)) { - log.warn("[PowerDiscovery] can't find any available server, this worker[appId={}] has been quarantined.", appId); + log.warn("[PowerDiscovery] can't find any available server, this worker has been quarantined."); // 在 Server 高可用的前提下,连续失败多次,说明该节点与外界失联,Server已经将秒级任务转移到其他Worker,需要杀死本地的任务 if (FAILED_COUNT++ > MAX_FAILED_COUNT) { log.warn("[PowerDiscovery] can't find any available server for 3 consecutive times, It's time to kill all frequent job in this worker."); - List frequentInstanceIds = TaskTrackerPool.getAllFrequentTaskTrackerKeys(); + List frequentInstanceIds = HeavyTaskTrackerManager.getAllFrequentTaskTrackerKeys(); if (!CollectionUtils.isEmpty(frequentInstanceIds)) { frequentInstanceIds.forEach(instanceId -> { - TaskTracker taskTracker = TaskTrackerPool.remove(instanceId); + HeavyTaskTracker taskTracker = HeavyTaskTrackerManager.removeTaskTracker(instanceId); taskTracker.destroy(); log.warn("[PowerDiscovery] kill frequent instance(instanceId={}) due to can't find any available server.", instanceId); }); @@ -108,7 +123,7 @@ public class ServerDiscoveryService { } else { // 重置失败次数 FAILED_COUNT = 0; - log.debug("[PowerDiscovery] appId={}, current server is {}.", appId, result); + log.debug("[PowerDiscovery] current server is {}.", result); return result; } } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/background/WorkerHealthReporter.java b/powerjob-worker/src/main/java/tech/powerjob/worker/background/WorkerHealthReporter.java index 80bc4265..50d50f20 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/background/WorkerHealthReporter.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/background/WorkerHealthReporter.java @@ -1,6 +1,7 @@ package tech.powerjob.worker.background; import akka.actor.ActorSelection; +import lombok.RequiredArgsConstructor; import tech.powerjob.common.enums.Protocol; import tech.powerjob.common.model.SystemMetrics; import tech.powerjob.common.request.WorkerHeartbeat; @@ -9,9 +10,11 @@ import tech.powerjob.worker.common.WorkerRuntime; import tech.powerjob.worker.common.utils.AkkaUtils; import tech.powerjob.worker.common.utils.SystemInfoUtils; import tech.powerjob.worker.container.OmsContainerFactory; -import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.util.StringUtils; +import tech.powerjob.worker.core.tracker.manager.HeavyTaskTrackerManager; +import tech.powerjob.worker.core.tracker.manager.LightTaskTrackerManager; + /** * Worker健康度定时上报 @@ -20,7 +23,7 @@ import org.springframework.util.StringUtils; * @since 2020/3/25 */ @Slf4j -@AllArgsConstructor +@RequiredArgsConstructor public class WorkerHealthReporter implements Runnable { private final WorkerRuntime workerRuntime; @@ -31,6 +34,7 @@ public class WorkerHealthReporter implements Runnable { // 没有可用Server,无法上报 String currentServer = workerRuntime.getServerDiscoveryService().getCurrentServerAddress(); if (StringUtils.isEmpty(currentServer)) { + log.warn("[WorkerHealthReporter] no available server,fail to report health info!"); return; } @@ -54,14 +58,30 @@ public class WorkerHealthReporter implements Runnable { heartbeat.setClient("Atlantis"); heartbeat.setTag(workerRuntime.getWorkerConfig().getTag()); + // 上报 Tracker 数量 + heartbeat.setLightTaskTrackerNum(LightTaskTrackerManager.currentTaskTrackerSize()); + heartbeat.setHeavyTaskTrackerNum(HeavyTaskTrackerManager.currentTaskTrackerSize()); + // 是否超载 + if (workerRuntime.getWorkerConfig().getMaxLightweightTaskNum() <= LightTaskTrackerManager.currentTaskTrackerSize() || workerRuntime.getWorkerConfig().getMaxHeavyweightTaskNum() <= HeavyTaskTrackerManager.currentTaskTrackerSize()){ + heartbeat.setOverload(true); + } // 获取当前加载的容器列表 heartbeat.setContainerInfos(OmsContainerFactory.getDeployedContainerInfos()); - // 发送请求 String serverPath = AkkaUtils.getServerActorPath(currentServer); if (StringUtils.isEmpty(serverPath)) { return; } + // log + log.info("[WorkerHealthReporter] report health status,appId:{},appName:{},isOverload:{},maxLightweightTaskNum:{},currentLightweightTaskNum:{},maxHeavyweightTaskNum:{},currentHeavyweightTaskNum:{}" , + heartbeat.getAppId(), + heartbeat.getAppName(), + heartbeat.isOverload(), + workerRuntime.getWorkerConfig().getMaxLightweightTaskNum(), + heartbeat.getLightTaskTrackerNum(), + workerRuntime.getWorkerConfig().getMaxHeavyweightTaskNum(), + heartbeat.getHeavyTaskTrackerNum() + ); ActorSelection actorSelection = workerRuntime.getActorSystem().actorSelection(serverPath); actorSelection.tell(heartbeat, null); } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerConfig.java b/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerConfig.java index 2b7494dd..961aacec 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerConfig.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerConfig.java @@ -1,13 +1,13 @@ package tech.powerjob.worker.common; +import com.google.common.collect.Lists; +import lombok.Getter; +import lombok.Setter; import tech.powerjob.common.RemoteConstant; import tech.powerjob.worker.common.constants.StoreStrategy; import tech.powerjob.worker.core.processor.ProcessResult; import tech.powerjob.worker.core.processor.WorkflowContext; import tech.powerjob.worker.extension.SystemMetricsCollector; -import com.google.common.collect.Lists; -import lombok.Getter; -import lombok.Setter; import java.util.List; @@ -65,5 +65,17 @@ public class PowerJobWorkerConfig { private SystemMetricsCollector systemMetricsCollector; private String tag; + /** + * Max numbers of LightTaskTacker + */ + private Integer maxLightweightTaskNum = 1024; + /** + * Max numbers of HeavyTaskTacker + */ + private Integer maxHeavyweightTaskNum = 64; + /** + * Interval(s) of worker health report + */ + private Integer healthReportInterval = 10; } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerVersion.java b/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerVersion.java index 6f49069a..1926ee79 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerVersion.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/common/PowerJobWorkerVersion.java @@ -1,15 +1,7 @@ package tech.powerjob.worker.common; import org.apache.commons.lang3.StringUtils; - -import java.io.File; -import java.io.IOException; -import java.net.JarURLConnection; -import java.net.URL; -import java.net.URLConnection; -import java.security.CodeSource; -import java.util.jar.Attributes; -import java.util.jar.JarFile; +import tech.powerjob.common.utils.JavaUtils; /** * 获取 Worker 版本,便于开发者排查问题 @@ -29,36 +21,9 @@ public final class PowerJobWorkerVersion { */ public static String getVersion() { if (StringUtils.isEmpty(CACHE)) { - CACHE = determinePowerJobVersion(); + CACHE = JavaUtils.determinePackageVersion(PowerJobWorkerVersion.class); } return CACHE; } - private static String determinePowerJobVersion() { - String implementationVersion = PowerJobWorkerVersion.class.getPackage().getImplementationVersion(); - if (implementationVersion != null) { - return implementationVersion; - } - CodeSource codeSource = PowerJobWorkerVersion.class.getProtectionDomain().getCodeSource(); - if (codeSource == null) { - return null; - } - URL codeSourceLocation = codeSource.getLocation(); - try { - URLConnection connection = codeSourceLocation.openConnection(); - if (connection instanceof JarURLConnection) { - return getImplementationVersion(((JarURLConnection) connection).getJarFile()); - } - try (JarFile jarFile = new JarFile(new File(codeSourceLocation.toURI()))) { - return getImplementationVersion(jarFile); - } - } - catch (Exception ex) { - return null; - } - } - - private static String getImplementationVersion(JarFile jarFile) throws IOException { - return jarFile.getManifest().getMainAttributes().getValue(Attributes.Name.IMPLEMENTATION_VERSION); - } } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/common/WorkerRuntime.java b/powerjob-worker/src/main/java/tech/powerjob/worker/common/WorkerRuntime.java index cae7eced..f7b07c41 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/common/WorkerRuntime.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/common/WorkerRuntime.java @@ -3,6 +3,8 @@ package tech.powerjob.worker.common; import akka.actor.ActorSystem; import tech.powerjob.worker.background.OmsLogHandler; import tech.powerjob.worker.background.ServerDiscoveryService; +import tech.powerjob.worker.background.WorkerHealthReporter; +import tech.powerjob.worker.core.executor.ExecutorManager; import tech.powerjob.worker.persistence.TaskPersistenceService; import lombok.Data; @@ -17,12 +19,19 @@ public class WorkerRuntime { private Long appId; - private PowerJobWorkerConfig workerConfig; - private String workerAddress; + private PowerJobWorkerConfig workerConfig; + private ActorSystem actorSystem; + + private WorkerHealthReporter healthReporter; + + private ExecutorManager executorManager; + private OmsLogHandler omsLogHandler; + private ServerDiscoveryService serverDiscoveryService; + private TaskPersistenceService taskPersistenceService; } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/common/constants/TaskStatus.java b/powerjob-worker/src/main/java/tech/powerjob/worker/common/constants/TaskStatus.java index f16327a6..1a5ad6e2 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/common/constants/TaskStatus.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/common/constants/TaskStatus.java @@ -23,7 +23,7 @@ public enum TaskStatus { WORKER_PROCESS_FAILED(5, "worker执行失败"), WORKER_PROCESS_SUCCESS(6, "worker执行成功"); - public static final Set finishedStatus = Sets.newHashSet(WORKER_PROCESS_FAILED.value, WORKER_PROCESS_SUCCESS.value); + public static final Set FINISHED_STATUS = Sets.newHashSet(WORKER_PROCESS_FAILED.value, WORKER_PROCESS_SUCCESS.value); private final int value; private final String des; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/common/utils/AkkaUtils.java b/powerjob-worker/src/main/java/tech/powerjob/worker/common/utils/AkkaUtils.java index 4193449e..c94d2f2f 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/common/utils/AkkaUtils.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/common/utils/AkkaUtils.java @@ -6,7 +6,7 @@ import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.response.AskResponse; import tech.powerjob.common.RemoteConstant; import lombok.extern.slf4j.Slf4j; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.time.Duration; import java.util.concurrent.CompletionStage; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/container/OhMyClassLoader.java b/powerjob-worker/src/main/java/tech/powerjob/worker/container/OhMyClassLoader.java index d95a3a06..f010b47a 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/container/OhMyClassLoader.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/container/OhMyClassLoader.java @@ -1,7 +1,7 @@ package tech.powerjob.worker.container; import lombok.extern.slf4j.Slf4j; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.File; import java.net.URL; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/container/OmsJarContainer.java b/powerjob-worker/src/main/java/tech/powerjob/worker/container/OmsJarContainer.java index caa1674d..2bd27648 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/container/OmsJarContainer.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/container/OmsJarContainer.java @@ -8,7 +8,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.FileUtils; import org.springframework.beans.BeansException; import org.springframework.context.support.ClassPathXmlApplicationContext; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.File; import java.io.InputStream; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/ProcessorBeanFactory.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/ProcessorBeanFactory.java index 18315f1f..d86cdfd7 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/ProcessorBeanFactory.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/ProcessorBeanFactory.java @@ -16,8 +16,11 @@ import java.util.Map; @Slf4j public class ProcessorBeanFactory { - // key(用来防止不同jar包同名类的冲突) -> (className -> Processor) + /** + * key(用来防止不同jar包同名类的冲突) -> (className -> Processor) + */ private final Map> cache; + private static final String LOCAL_KEY = "local"; private static volatile ProcessorBeanFactory processorBeanFactory; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/executor/ExecutorManager.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/executor/ExecutorManager.java new file mode 100644 index 00000000..8697070f --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/executor/ExecutorManager.java @@ -0,0 +1,54 @@ +package tech.powerjob.worker.core.executor; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import lombok.Getter; +import tech.powerjob.worker.common.PowerJobWorkerConfig; + +import java.util.concurrent.*; + +/** + * @author Echo009 + * @since 2022/9/23 + */ +@Getter +public class ExecutorManager { + /** + * 执行 Worker 底层核心任务 + */ + private final ScheduledExecutorService coreExecutor; + /** + * 执行轻量级任务状态上报 + */ + private final ScheduledExecutorService lightweightTaskStatusCheckExecutor; + /** + * 执行轻量级任务 + */ + private final ExecutorService lightweightTaskExecutorService; + + + public ExecutorManager(PowerJobWorkerConfig workerConfig){ + + + final int availableProcessors = Runtime.getRuntime().availableProcessors(); + // 初始化定时线程池 + ThreadFactory coreThreadFactory = new ThreadFactoryBuilder().setNameFormat("powerjob-worker-core-%d").build(); + coreExecutor = new ScheduledThreadPoolExecutor(3, coreThreadFactory); + + ThreadFactory lightTaskReportFactory = new ThreadFactoryBuilder().setNameFormat("powerjob-worker-light-task-status-check-%d").build(); + // 都是 io 密集型任务 + lightweightTaskStatusCheckExecutor = new ScheduledThreadPoolExecutor(availableProcessors * 10, lightTaskReportFactory); + + ThreadFactory lightTaskExecuteFactory = new ThreadFactoryBuilder().setNameFormat("powerjob-worker-light-task-execute-%d").build(); + // 大部分任务都是 io 密集型 + lightweightTaskExecutorService = new ThreadPoolExecutor(availableProcessors * 10,availableProcessors * 10, 120L, TimeUnit.SECONDS, + new ArrayBlockingQueue<>((workerConfig.getMaxLightweightTaskNum() * 2),true), lightTaskExecuteFactory, new ThreadPoolExecutor.AbortPolicy()); + + } + + + + public void shutdown(){ + coreExecutor.shutdownNow(); + } + +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessResult.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessResult.java index 295e841f..05d96f70 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessResult.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessResult.java @@ -16,6 +16,7 @@ import lombok.*; public class ProcessResult { private boolean success = false; + private String msg; public ProcessResult(boolean success) { diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessorInfo.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessorInfo.java new file mode 100644 index 00000000..a971911e --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessorInfo.java @@ -0,0 +1,22 @@ +package tech.powerjob.worker.core.processor; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import tech.powerjob.worker.core.processor.sdk.BasicProcessor; + +/** + * @author Echo009 + * @since 2022/9/23 + */ +@RequiredArgsConstructor +@Getter +public class ProcessorInfo { + + private final BasicProcessor basicProcessor; + + private final ClassLoader classLoader; + + public static ProcessorInfo of(BasicProcessor basicProcessor, ClassLoader classLoader) { + return new ProcessorInfo(basicProcessor, classLoader); + } +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessorLoader.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessorLoader.java new file mode 100644 index 00000000..9256f337 --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/ProcessorLoader.java @@ -0,0 +1,89 @@ +package tech.powerjob.worker.core.processor; + +import akka.actor.ActorSelection; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.exception.ExceptionUtils; +import tech.powerjob.common.enums.ProcessorType; +import tech.powerjob.common.exception.PowerJobException; +import tech.powerjob.worker.common.WorkerRuntime; +import tech.powerjob.worker.common.utils.AkkaUtils; +import tech.powerjob.worker.common.utils.SpringUtils; +import tech.powerjob.worker.container.OmsContainer; +import tech.powerjob.worker.container.OmsContainerFactory; +import tech.powerjob.worker.core.ProcessorBeanFactory; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * @author Echo009 + * @since 2022/9/19 + */ +@Slf4j +public class ProcessorLoader { + + + private static final Map CACHE; + + + static { + // init + CACHE = new ConcurrentHashMap<>(128); + } + + /** + * 获取处理器 + * @param workerRuntime 运行时 + * @param processorType 处理器类型 + * @param processorInfo 处理器 id ,一般是全限定类名 + * @return processor + */ + public static ProcessorInfo loadProcessor(WorkerRuntime workerRuntime, String processorType, String processorInfo) { + ProcessorInfo processorInfoHolder = null; + ProcessorType type = ProcessorType.valueOf(processorType); + + switch (type) { + case BUILT_IN: + // 先从缓存中取 + processorInfoHolder = CACHE.computeIfAbsent(processorInfo, ignore -> { + // 先使用 Spring 加载 + if (SpringUtils.supportSpringBean()) { + try { + return ProcessorInfo.of(SpringUtils.getBean(processorInfo),workerRuntime.getClass().getClassLoader()); + } catch (Exception e) { + log.warn("[ProcessorLoader] no spring bean of processor(className={}), reason is {}.", processorInfo, ExceptionUtils.getMessage(e)); + } + } + // 反射加载 + return ProcessorInfo.of(ProcessorBeanFactory.getInstance().getLocalProcessor(processorInfo),workerRuntime.getClass().getClassLoader()); + }); + break; + case EXTERNAL: + String[] split = processorInfo.split("#"); + log.info("[ProcessorLoader] try to load processor({}) in container({})", split[1], split[0]); + + String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); + ActorSelection actorSelection = workerRuntime.getActorSystem().actorSelection(serverPath); + OmsContainer omsContainer = OmsContainerFactory.fetchContainer(Long.valueOf(split[0]), actorSelection); + if (omsContainer != null) { + processorInfoHolder = ProcessorInfo.of(omsContainer.getProcessor(split[1]), omsContainer.getContainerClassLoader()); + } else { + log.warn("[ProcessorLoader] load container failed. processor info : {}", processorInfo); + } + break; + default: + log.warn("[ProcessorLoader] unknown processor type: {}.", processorType); + throw new PowerJobException("unknown processor type of " + processorType); + } + + if (processorInfoHolder == null) { + log.warn("[ProcessorLoader] fetch Processor(type={},info={}) failed.", processorType, processorInfo); + throw new PowerJobException("fetch Processor failed, please check your processorType and processorInfo config"); + } + + return processorInfoHolder; + + } + + +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/executor/ProcessorRunnable.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/runnable/HeavyProcessorRunnable.java similarity index 98% rename from powerjob-worker/src/main/java/tech/powerjob/worker/core/executor/ProcessorRunnable.java rename to powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/runnable/HeavyProcessorRunnable.java index 8a82eca1..d42ed61b 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/executor/ProcessorRunnable.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/processor/runnable/HeavyProcessorRunnable.java @@ -1,4 +1,4 @@ -package tech.powerjob.worker.core.executor; +package tech.powerjob.worker.core.processor.runnable; import akka.actor.ActorSelection; import tech.powerjob.common.enums.ExecuteType; @@ -24,7 +24,7 @@ import com.google.common.base.Stopwatch; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.util.Collections; import java.util.List; @@ -41,7 +41,7 @@ import java.util.Queue; @Slf4j @AllArgsConstructor @SuppressWarnings("squid:S1181") -public class ProcessorRunnable implements Runnable { +public class HeavyProcessorRunnable implements Runnable { private final InstanceInfo instanceInfo; @@ -221,7 +221,7 @@ public class ProcessorRunnable implements Runnable { req.setAppendedWfContext(appendedWfContext); // 最终结束状态要求可靠发送 - if (TaskStatus.finishedStatus.contains(status.getValue())) { + if (TaskStatus.FINISHED_STATUS.contains(status.getValue())) { boolean success = AkkaUtils.reliableTransmit(taskTrackerActor, req); if (!success) { // 插入重试队列,等待重试 diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/HeavyTaskTrackerManager.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/HeavyTaskTrackerManager.java new file mode 100644 index 00000000..2fd4bd1e --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/HeavyTaskTrackerManager.java @@ -0,0 +1,49 @@ +package tech.powerjob.worker.core.tracker.manager; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import tech.powerjob.worker.core.tracker.task.heavy.FrequentTaskTracker; +import tech.powerjob.worker.core.tracker.task.heavy.HeavyTaskTracker; + +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +/** + * 持有 TaskTracker 对象 + * + * @author tjq + * @since 2020/3/24 + */ +public class HeavyTaskTrackerManager { + + private static final Map INSTANCE_ID_2_TASK_TRACKER = Maps.newConcurrentMap(); + /** + * 获取 TaskTracker + */ + public static HeavyTaskTracker getTaskTracker(Long instanceId) { + return INSTANCE_ID_2_TASK_TRACKER.get(instanceId); + } + + public static HeavyTaskTracker removeTaskTracker(Long instanceId) { + return INSTANCE_ID_2_TASK_TRACKER.remove(instanceId); + } + + public static void atomicCreateTaskTracker(Long instanceId, Function creator) { + INSTANCE_ID_2_TASK_TRACKER.computeIfAbsent(instanceId, creator); + } + + public static List getAllFrequentTaskTrackerKeys() { + List keys = Lists.newLinkedList(); + INSTANCE_ID_2_TASK_TRACKER.forEach((key, tk) -> { + if (tk instanceof FrequentTaskTracker) { + keys.add(key); + } + }); + return keys; + } + + public static int currentTaskTrackerSize(){ + return INSTANCE_ID_2_TASK_TRACKER.size(); + } +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/LightTaskTrackerManager.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/LightTaskTrackerManager.java new file mode 100644 index 00000000..eef65f39 --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/LightTaskTrackerManager.java @@ -0,0 +1,36 @@ +package tech.powerjob.worker.core.tracker.manager; + +import com.google.common.collect.Maps; +import tech.powerjob.worker.core.tracker.task.light.LightTaskTracker; + +import java.util.Map; +import java.util.function.Function; + +/** + * @author Echo009 + * @since 2022/9/23 + */ +public class LightTaskTrackerManager { + + public static final double OVERLOAD_FACTOR = 1.3d; + + private static final Map INSTANCE_ID_2_TASK_TRACKER = Maps.newConcurrentMap(); + + + public static LightTaskTracker getTaskTracker(Long instanceId) { + return INSTANCE_ID_2_TASK_TRACKER.get(instanceId); + } + + public static LightTaskTracker removeTaskTracker(Long instanceId) { + return INSTANCE_ID_2_TASK_TRACKER.remove(instanceId); + } + + public static void atomicCreateTaskTracker(Long instanceId, Function creator) { + INSTANCE_ID_2_TASK_TRACKER.computeIfAbsent(instanceId, creator); + } + + public static int currentTaskTrackerSize(){ + return INSTANCE_ID_2_TASK_TRACKER.size(); + } + +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTrackerPool.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/ProcessorTrackerManager.java similarity index 59% rename from powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTrackerPool.java rename to powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/ProcessorTrackerManager.java index 643924f6..07e6478e 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTrackerPool.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/manager/ProcessorTrackerManager.java @@ -1,7 +1,8 @@ -package tech.powerjob.worker.core.tracker.processor; +package tech.powerjob.worker.core.tracker.manager; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import tech.powerjob.worker.core.tracker.processor.ProcessorTracker; import java.util.Collections; import java.util.List; @@ -15,21 +16,23 @@ import java.util.function.Supplier; * @author tjq * @since 2020/3/20 */ -public class ProcessorTrackerPool { +public class ProcessorTrackerManager { - // instanceId -> (TaskTrackerAddress -> ProcessorTracker) - // 处理脑裂情况下同一个 Instance 存在多个 TaskTracker 的情况 - private static final Map> processorTrackerPool = Maps.newHashMap(); + /** + * instanceId -> (TaskTrackerAddress -> ProcessorTracker) + * 处理脑裂情况下同一个 Instance 存在多个 TaskTracker 的情况 + */ + private static final Map> PROCESSOR_TRACKER_CONTAINER = Maps.newHashMap(); /** * 获取 ProcessorTracker,如果不存在则创建 */ public static synchronized ProcessorTracker getProcessorTracker(Long instanceId, String address, Supplier creator) { - ProcessorTracker processorTracker = processorTrackerPool.getOrDefault(instanceId, Collections.emptyMap()).get(address); + ProcessorTracker processorTracker = PROCESSOR_TRACKER_CONTAINER.getOrDefault(instanceId, Collections.emptyMap()).get(address); if (processorTracker == null) { processorTracker = creator.get(); - processorTrackerPool.computeIfAbsent(instanceId, ignore -> Maps.newHashMap()).put(address, processorTracker); + PROCESSOR_TRACKER_CONTAINER.computeIfAbsent(instanceId, ignore -> Maps.newHashMap()).put(address, processorTracker); } return processorTracker; } @@ -37,7 +40,7 @@ public class ProcessorTrackerPool { public static synchronized List removeProcessorTracker(Long instanceId) { List res = Lists.newLinkedList(); - Map ttAddress2Pt = processorTrackerPool.remove(instanceId); + Map ttAddress2Pt = PROCESSOR_TRACKER_CONTAINER.remove(instanceId); if (ttAddress2Pt != null) { res.addAll(ttAddress2Pt.values()); ttAddress2Pt.clear(); diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTracker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTracker.java index a81367cd..f3a54543 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTracker.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/processor/ProcessorTracker.java @@ -6,7 +6,6 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.exception.ExceptionUtils; import org.springframework.util.CollectionUtils; -import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.RemoteConstant; import tech.powerjob.common.enums.ExecuteType; import tech.powerjob.common.enums.ProcessorType; @@ -17,12 +16,10 @@ import tech.powerjob.common.utils.CommonUtils; import tech.powerjob.worker.common.WorkerRuntime; import tech.powerjob.worker.common.constants.TaskStatus; import tech.powerjob.worker.common.utils.AkkaUtils; -import tech.powerjob.worker.common.utils.SpringUtils; -import tech.powerjob.worker.container.OmsContainer; -import tech.powerjob.worker.container.OmsContainerFactory; -import tech.powerjob.worker.core.ProcessorBeanFactory; -import tech.powerjob.worker.core.executor.ProcessorRunnable; -import tech.powerjob.worker.core.processor.sdk.BasicProcessor; +import tech.powerjob.worker.core.processor.ProcessorInfo; +import tech.powerjob.worker.core.processor.runnable.HeavyProcessorRunnable; +import tech.powerjob.worker.core.processor.ProcessorLoader; +import tech.powerjob.worker.core.tracker.manager.ProcessorTrackerManager; import tech.powerjob.worker.log.OmsLogger; import tech.powerjob.worker.log.OmsLoggerFactory; import tech.powerjob.worker.log.impl.OmsServerLogger; @@ -58,14 +55,8 @@ public class ProcessorTracker { * 冗余 instanceId,方便日志 */ private Long instanceId; - /** - * 任务执行器 - */ - private BasicProcessor processor; - /** - * 容器(可能为空) - */ - private OmsContainer omsContainer; + + private ProcessorInfo processorInfo; /** * 在线日志 */ @@ -129,8 +120,7 @@ public class ProcessorTracker { // 初始化定时任务 initTimingJob(); // 初始化 Processor - initProcessor(); - + processorInfo = ProcessorLoader.loadProcessor(workerRuntime, instanceInfo.getProcessorType(), instanceInfo.getProcessorInfo()); log.info("[ProcessorTracker-{}] ProcessorTracker was successfully created!", instanceId); } catch (Throwable t) { log.warn("[ProcessorTracker-{}] create ProcessorTracker failed, all tasks submitted here will fail.", instanceId, t); @@ -171,10 +161,10 @@ public class ProcessorTracker { newTask.setInstanceId(instanceInfo.getInstanceId()); newTask.setAddress(taskTrackerAddress); - ClassLoader classLoader = omsContainer == null ? getClass().getClassLoader() : omsContainer.getContainerClassLoader(); - ProcessorRunnable processorRunnable = new ProcessorRunnable(instanceInfo, taskTrackerActorRef, newTask, processor, omsLogger, classLoader, statusReportRetryQueue, workerRuntime); + ClassLoader classLoader = processorInfo.getClassLoader(); + HeavyProcessorRunnable heavyProcessorRunnable = new HeavyProcessorRunnable(instanceInfo, taskTrackerActorRef, newTask, processorInfo.getBasicProcessor(), omsLogger, classLoader, statusReportRetryQueue, workerRuntime); try { - threadPool.submit(processorRunnable); + threadPool.submit(heavyProcessorRunnable); success = true; } catch (RejectedExecutionException ignore) { log.warn("[ProcessorTracker-{}] submit task(taskId={},taskName={}) to ThreadPool failed due to ThreadPool has too much task waiting to process, this task will dispatch to other ProcessorTracker.", @@ -204,11 +194,6 @@ public class ProcessorTracker { */ public void destroy() { - // 0. 移除Container引用 - if (omsContainer != null) { - omsContainer.tryRelease(); - } - // 1. 关闭执行执行线程池 CommonUtils.executeIgnoreException(() -> { List tasks = threadPool.shutdownNow(); @@ -220,7 +205,7 @@ public class ProcessorTracker { // 2. 去除顶层引用,送入GC世界 taskTrackerActorRef = null; statusReportRetryQueue.clear(); - ProcessorTrackerPool.removeProcessorTracker(instanceId); + ProcessorTrackerManager.removeProcessorTracker(instanceId); log.info("[ProcessorTracker-{}] ProcessorTracker destroyed successfully!", instanceId); @@ -326,52 +311,6 @@ public class ProcessorTracker { } - /** - * 初始化处理器 Processor - */ - private void initProcessor() throws Exception { - - ProcessorType processorType = ProcessorType.valueOf(instanceInfo.getProcessorType()); - String processorInfo = instanceInfo.getProcessorInfo(); - - switch (processorType) { - case BUILT_IN: - // 先使用 Spring 加载 - if (SpringUtils.supportSpringBean()) { - try { - processor = SpringUtils.getBean(processorInfo); - } catch (Exception e) { - log.warn("[ProcessorTracker-{}] no spring bean of processor(className={}), reason is {}.", instanceId, processorInfo, ExceptionUtils.getMessage(e)); - } - } - // 反射加载 - if (processor == null) { - processor = ProcessorBeanFactory.getInstance().getLocalProcessor(processorInfo); - } - break; - case EXTERNAL: - String[] split = processorInfo.split("#"); - log.info("[ProcessorTracker-{}] try to load processor({}) in container({})", instanceId, split[1], split[0]); - - String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); - ActorSelection actorSelection = workerRuntime.getActorSystem().actorSelection(serverPath); - omsContainer = OmsContainerFactory.fetchContainer(Long.valueOf(split[0]), actorSelection); - if (omsContainer != null) { - processor = omsContainer.getProcessor(split[1]); - } else { - log.warn("[ProcessorTracker-{}] load container failed.", instanceId); - } - break; - default: - log.warn("[ProcessorTracker-{}] unknown processor type: {}.", instanceId, processorType); - throw new PowerJobException("unknown processor type of " + processorType); - } - - if (processor == null) { - log.warn("[ProcessorTracker-{}] fetch Processor(type={},info={}) failed.", instanceId, processorType, processorInfo); - throw new PowerJobException("fetch Processor failed, please check your processorType and processorInfo config"); - } - } /** * 计算线程池大小 diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTracker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTracker.java index 492b8d7b..f3546db1 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTracker.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTracker.java @@ -1,57 +1,29 @@ package tech.powerjob.worker.core.tracker.task; import akka.actor.ActorSelection; -import com.fasterxml.jackson.core.type.TypeReference; -import lombok.AllArgsConstructor; -import tech.powerjob.common.enums.ExecuteType; +import akka.pattern.Patterns; +import com.google.common.collect.Maps; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; import tech.powerjob.common.enums.InstanceStatus; -import tech.powerjob.common.RemoteConstant; -import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.model.InstanceDetail; import tech.powerjob.common.request.ServerScheduleJobReq; import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq; -import tech.powerjob.common.request.WorkerQueryExecutorClusterReq; import tech.powerjob.common.response.AskResponse; -import tech.powerjob.common.utils.CommonUtils; -import tech.powerjob.common.serialize.JsonUtils; -import tech.powerjob.common.utils.SegmentLock; import tech.powerjob.worker.common.WorkerRuntime; -import tech.powerjob.worker.common.constants.TaskConstant; -import tech.powerjob.worker.common.constants.TaskStatus; import tech.powerjob.worker.common.utils.AkkaUtils; -import tech.powerjob.worker.common.utils.WorkflowContextUtils; -import tech.powerjob.worker.core.ha.ProcessorTrackerStatusHolder; -import tech.powerjob.worker.persistence.TaskDO; -import tech.powerjob.worker.persistence.TaskPersistenceService; import tech.powerjob.worker.pojo.model.InstanceInfo; -import tech.powerjob.worker.pojo.request.ProcessorTrackerStatusReportReq; -import tech.powerjob.worker.pojo.request.TaskTrackerStartTaskReq; -import tech.powerjob.worker.pojo.request.TaskTrackerStopInstanceReq; -import com.google.common.base.Stopwatch; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import lombok.Data; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; -import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; -import javax.annotation.Nullable; +import java.time.Duration; import java.util.Collections; -import java.util.List; import java.util.Map; -import java.util.Optional; -import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; /** - * 负责管理 JobInstance 的运行,主要包括任务的派发(MR可能存在大量的任务)和状态的更新 - * - * @author tjq - * @since 2020/4/8 + * @author Echo009 + * @since 2022/9/19 */ @Slf4j public abstract class TaskTracker { @@ -60,10 +32,6 @@ public abstract class TaskTracker { * TaskTracker创建时间 */ protected final long createTime; - /** - * worker 运行时元数据 - */ - protected final WorkerRuntime workerRuntime; /** * 任务实例ID,使用频率过高,从 InstanceInfo 提取出来单独保存一份 */ @@ -72,22 +40,6 @@ public abstract class TaskTracker { * 任务实例信息 */ protected final InstanceInfo instanceInfo; - /** - * ProcessTracker 状态管理 - */ - protected final ProcessorTrackerStatusHolder ptStatusHolder; - /** - * 数据库持久化服务 - */ - protected final TaskPersistenceService taskPersistenceService; - /** - * 定时任务线程池 - */ - protected ScheduledExecutorService scheduledPool; - /** - * 是否结束 - */ - protected final AtomicBoolean finished; /** * 追加的工作流上下文数据 * @@ -95,538 +47,46 @@ public abstract class TaskTracker { */ protected final Map appendedWfContext; /** - * 任务信息缓存 + * worker 运行时元数据 */ - private final Cache taskId2BriefInfo; - - + protected final WorkerRuntime workerRuntime; /** - * 分段锁 + * 是否结束 */ - private final SegmentLock segmentLock; - private static final int UPDATE_CONCURRENCY = 4; + protected final AtomicBoolean finished; + /** + * 连续上报多次失败后放弃上报,视为结果不可达,TaskTracker down + */ + protected int reportFailedCnt = 0; + + protected static final int MAX_REPORT_FAILED_THRESHOLD = 5; protected TaskTracker(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { - - // 初始化成员变量 this.createTime = System.currentTimeMillis(); this.workerRuntime = workerRuntime; this.instanceId = req.getInstanceId(); + this.instanceInfo = new InstanceInfo(); BeanUtils.copyProperties(req, instanceInfo); // 特殊处理超时时间 if (instanceInfo.getInstanceTimeoutMS() <= 0) { - // Integer最大值:2147483647,一天的毫秒数:86400000;够执行24天了...要是不满足需求就让开发者手动指定吧 instanceInfo.setInstanceTimeoutMS(Integer.MAX_VALUE); } - // 赋予时间表达式类型 - instanceInfo.setTimeExpressionType(TimeExpressionType.valueOf(req.getTimeExpressionType()).getV()); - // 保护性操作 - instanceInfo.setThreadConcurrency(Math.max(1, instanceInfo.getThreadConcurrency())); - - this.ptStatusHolder = new ProcessorTrackerStatusHolder(instanceId, req.getMaxWorkerCount(), req.getAllWorkerAddress()); - this.taskPersistenceService = workerRuntime.getTaskPersistenceService(); - this.finished = new AtomicBoolean(false); // 只有工作流中的任务允许向工作流中追加上下文数据 this.appendedWfContext = req.getWfInstanceId() == null ? Collections.emptyMap() : Maps.newConcurrentMap(); - // 构建缓存 - taskId2BriefInfo = CacheBuilder.newBuilder().maximumSize(1024).build(); - - // 构建分段锁 - segmentLock = new SegmentLock(UPDATE_CONCURRENCY); - - // 子类自定义初始化操作 - initTaskTracker(req); - - log.info("[TaskTracker-{}] create TaskTracker successfully.", instanceId); + this.finished = new AtomicBoolean(false); } /** - * 静态方法创建 TaskTracker - * - * @param req 服务端调度任务请求 - * @return API/CRON -> CommonTaskTracker, FIX_RATE/FIX_DELAY -> FrequentTaskTracker + * 销毁 */ - public static TaskTracker create(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { - try { - TimeExpressionType timeExpressionType = TimeExpressionType.valueOf(req.getTimeExpressionType()); - switch (timeExpressionType) { - case FIXED_RATE: - case FIXED_DELAY: - return new FrequentTaskTracker(req, workerRuntime); - default: - return new CommonTaskTracker(req, workerRuntime); - } - } catch (Exception e) { - log.warn("[TaskTracker-{}] create TaskTracker from request({}) failed.", req.getInstanceId(), req, e); - - // 直接发送失败请求 - TaskTrackerReportInstanceStatusReq response = new TaskTrackerReportInstanceStatusReq(); - BeanUtils.copyProperties(req, response); - response.setInstanceStatus(InstanceStatus.FAILED.getV()); - response.setResult(String.format("init TaskTracker failed, reason: %s", e.toString())); - response.setReportTime(System.currentTimeMillis()); - response.setStartTime(System.currentTimeMillis()); - response.setSourceAddress(workerRuntime.getWorkerAddress()); - - String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); - ActorSelection serverActor = workerRuntime.getActorSystem().actorSelection(serverPath); - serverActor.tell(response, null); - } - return null; - } - - /* *************************** 对外方法区 *************************** */ + public abstract void destroy(); /** - * 更新追加的上下文数据 - * - * @param newAppendedWfContext 追加的上下文数据 - * @since 2021/02/05 + * 停止任务 */ - public void updateAppendedWfContext(Map newAppendedWfContext) { + public abstract void stopTask(); - // check - if (instanceInfo.getWfInstanceId() == null || CollectionUtils.isEmpty(newAppendedWfContext)) { - // 只有工作流中的任务才有存储的必要 - return; - } - // 检查追加的上下文大小是否超出限制 - if (WorkflowContextUtils.isExceededLengthLimit(appendedWfContext, workerRuntime.getWorkerConfig().getMaxAppendedWfContextLength())) { - log.warn("[TaskTracker-{}]current length of appended workflow context data is greater than {}, this appended workflow context data will be ignore!", instanceInfo.getInstanceId(), workerRuntime.getWorkerConfig().getMaxAppendedWfContextLength()); - // ignore appended workflow context data - return; - } - - for (Map.Entry entry : newAppendedWfContext.entrySet()) { - String originValue = appendedWfContext.put(entry.getKey(), entry.getValue()); - log.info("[TaskTracker-{}] update appended workflow context data {} : {} -> {}", instanceInfo.getInstanceId(), entry.getKey(), originValue, entry.getValue()); - } - - } - - - /** - * 更新Task状态 - * V1.0.0 -> V1.0.1(e405e283ad7f97b0b4e5d369c7de884c0caf9192) 锁方案变更,从 synchronized (taskId.intern()) 修改为分段锁,能大大减少内存占用,损失的只有理论并发度而已 - * - * @param subInstanceId 子任务实例ID - * @param taskId task的ID(task为任务实例的执行单位) - * @param newStatus task的新状态 - * @param reportTime 上报时间 - * @param result task的执行结果,未执行完成时为空 - */ - @SuppressWarnings({"squid:S3776", "squid:S2142"}) - public void updateTaskStatus(Long subInstanceId, String taskId, int newStatus, long reportTime, @Nullable String result) { - - if (finished.get()) { - return; - } - TaskStatus nTaskStatus = TaskStatus.of(newStatus); - - int lockId = taskId.hashCode(); - try { - - // 阻塞获取锁 - segmentLock.lockInterruptible(lockId); - TaskBriefInfo taskBriefInfo = taskId2BriefInfo.getIfPresent(taskId); - - // 缓存中不存在,从数据库查 - if (taskBriefInfo == null) { - Optional taskOpt = taskPersistenceService.getTask(instanceId, taskId); - if (taskOpt.isPresent()) { - TaskDO taskDO = taskOpt.get(); - taskBriefInfo = new TaskBriefInfo(taskId, TaskStatus.of(taskDO.getStatus()), taskDO.getLastReportTime()); - } else { - // 理论上不存在这种情况,除非数据库异常 - log.error("[TaskTracker-{}-{}] can't find task by taskId={}.", instanceId, subInstanceId, taskId); - taskBriefInfo = new TaskBriefInfo(taskId, TaskStatus.WAITING_DISPATCH, -1L); - } - // 写入缓存 - taskId2BriefInfo.put(taskId, taskBriefInfo); - } - - // 过滤过期的请求(潜在的集群时间一致性需求,重试跨 Worker 时,时间不一致可能导致问题) - if (taskBriefInfo.getLastReportTime() > reportTime) { - log.warn("[TaskTracker-{}-{}] receive expired(last {} > current {}) task status report(taskId={},newStatus={}), TaskTracker will drop this report.", - instanceId, subInstanceId, taskBriefInfo.getLastReportTime(), reportTime, taskId, newStatus); - return; - } - // 检查状态转移是否合法,fix issue 404 - if (nTaskStatus.getValue() < taskBriefInfo.getStatus().getValue()) { - log.warn("[TaskTracker-{}-{}] receive invalid task status report(taskId={},currentStatus={},newStatus={}), TaskTracker will drop this report.", - instanceId, subInstanceId, taskId, taskBriefInfo.getStatus().getValue(), newStatus); - return; - } - - // 此时本次请求已经有效,先更新相关信息 - taskBriefInfo.setLastReportTime(reportTime); - taskBriefInfo.setStatus(nTaskStatus); - - // 处理失败的情况 - int configTaskRetryNum = instanceInfo.getTaskRetryNum(); - if (nTaskStatus == TaskStatus.WORKER_PROCESS_FAILED && configTaskRetryNum >= 1) { - - // 失败不是主要的情况,多查一次数据库也问题不大(况且前面有缓存顶着,大部分情况之前不会去查DB) - Optional taskOpt = taskPersistenceService.getTask(instanceId, taskId); - // 查询DB再失败的话,就不重试了... - if (taskOpt.isPresent()) { - int failedCnt = taskOpt.get().getFailedCnt(); - if (failedCnt < configTaskRetryNum) { - - TaskDO updateEntity = new TaskDO(); - updateEntity.setFailedCnt(failedCnt + 1); - - /* - 地址规则: - 1. 当前存储的地址为任务派发的目的地(ProcessorTracker地址) - 2. 根任务、最终任务必须由TaskTracker所在机器执行(如果是根任务和最终任务,不应当修改地址) - 3. 广播任务每台机器都需要执行,因此不应该重新分配worker(广播任务不应当修改地址) - */ - String taskName = taskOpt.get().getTaskName(); - ExecuteType executeType = ExecuteType.valueOf(instanceInfo.getExecuteType()); - if (!taskName.equals(TaskConstant.ROOT_TASK_NAME) && !taskName.equals(TaskConstant.LAST_TASK_NAME) && executeType != ExecuteType.BROADCAST) { - updateEntity.setAddress(RemoteConstant.EMPTY_ADDRESS); - } - - updateEntity.setStatus(TaskStatus.WAITING_DISPATCH.getValue()); - updateEntity.setLastReportTime(reportTime); - - boolean retryTask = taskPersistenceService.updateTask(instanceId, taskId, updateEntity); - if (retryTask) { - log.info("[TaskTracker-{}-{}] task(taskId={}) process failed, TaskTracker will have a retry.", instanceId, subInstanceId, taskId); - return; - } - } - } - } - - // 更新状态(失败重试写入DB失败的,也就不重试了...谁让你那么倒霉呢...) - result = result == null ? "" : result; - boolean updateResult = taskPersistenceService.updateTaskStatus(instanceId, taskId, newStatus, reportTime, result); - - if (!updateResult) { - log.warn("[TaskTracker-{}-{}] update task status failed, this task(taskId={}) may be processed repeatedly!", instanceId, subInstanceId, taskId); - } - - } catch (InterruptedException ignore) { - // ignore - } catch (Exception e) { - log.warn("[TaskTracker-{}-{}] update task status failed.", instanceId, subInstanceId, e); - } finally { - segmentLock.unlock(lockId); - } - } - - /** - * 提交Task任务(MapReduce的Map,Broadcast的广播),上层保证 batchSize,同时插入过多数据可能导致失败 - * - * @param newTaskList 新增的子任务列表 - */ - public boolean submitTask(List newTaskList) { - if (finished.get()) { - return true; - } - if (CollectionUtils.isEmpty(newTaskList)) { - return true; - } - // 基础处理(多循环一次虽然有些浪费,但分布式执行中,这点耗时绝不是主要占比,忽略不计!) - newTaskList.forEach(task -> { - task.setInstanceId(instanceId); - task.setStatus(TaskStatus.WAITING_DISPATCH.getValue()); - task.setFailedCnt(0); - task.setLastModifiedTime(System.currentTimeMillis()); - task.setCreatedTime(System.currentTimeMillis()); - task.setLastReportTime(-1L); - }); - - log.debug("[TaskTracker-{}] receive new tasks: {}", instanceId, newTaskList); - return taskPersistenceService.batchSave(newTaskList); - } - - /** - * 处理 ProcessorTracker 的心跳信息 - * - * @param heartbeatReq ProcessorTracker(任务的执行管理器)发来的心跳包,包含了其当前状态 - */ - public void receiveProcessorTrackerHeartbeat(ProcessorTrackerStatusReportReq heartbeatReq) { - log.debug("[TaskTracker-{}] receive heartbeat: {}", instanceId, heartbeatReq); - ptStatusHolder.updateStatus(heartbeatReq); - - // 上报空闲,检查是否已经接收到全部该 ProcessorTracker 负责的任务 - if (heartbeatReq.getType() == ProcessorTrackerStatusReportReq.IDLE) { - String idlePtAddress = heartbeatReq.getAddress(); - // 该 ProcessorTracker 已销毁,重置为初始状态 - ptStatusHolder.getProcessorTrackerStatus(idlePtAddress).setDispatched(false); - List unfinishedTask = taskPersistenceService.getAllUnFinishedTaskByAddress(instanceId, idlePtAddress); - if (!CollectionUtils.isEmpty(unfinishedTask)) { - log.warn("[TaskTracker-{}] ProcessorTracker({}) is idle now but have unfinished tasks: {}", instanceId, idlePtAddress, unfinishedTask); - unfinishedTask.forEach(task -> updateTaskStatus(task.getSubInstanceId(), task.getTaskId(), TaskStatus.WORKER_PROCESS_FAILED.getValue(), System.currentTimeMillis(), "SYSTEM: unreceived process result")); - } - } - } - - /** - * 生成广播任务 - * - * @param preExecuteSuccess 预执行广播任务运行状态 - * @param subInstanceId 子实例ID - * @param preTaskId 预执行广播任务的taskId - * @param result 预执行广播任务的结果 - */ - public void broadcast(boolean preExecuteSuccess, long subInstanceId, String preTaskId, String result) { - - if (finished.get()) { - return; - } - - log.info("[TaskTracker-{}-{}] finished broadcast's preProcess, preExecuteSuccess:{},preTaskId:{},result:{}", instanceId, subInstanceId, preExecuteSuccess, preTaskId, result); - - // 生成集群子任务 - if (preExecuteSuccess) { - List allWorkerAddress = ptStatusHolder.getAllProcessorTrackers(); - List subTaskList = Lists.newLinkedList(); - for (int i = 0; i < allWorkerAddress.size(); i++) { - TaskDO subTask = new TaskDO(); - subTask.setSubInstanceId(subInstanceId); - subTask.setTaskName(TaskConstant.BROADCAST_TASK_NAME); - subTask.setTaskId(preTaskId + "." + i); - // 广播任务直接写入派发地址 - subTask.setAddress(allWorkerAddress.get(i)); - subTaskList.add(subTask); - } - submitTask(subTaskList); - } else { - log.warn("[TaskTracker-{}-{}] BroadcastTask failed because of preProcess failed, preProcess result={}.", instanceId, subInstanceId, result); - } - } - - /** - * 销毁自身,释放资源 - */ - public void destroy() { - - finished.set(true); - - Stopwatch sw = Stopwatch.createStarted(); - // 0. 开始关闭线程池,不能使用 shutdownNow(),因为 destroy 方法本身就在 scheduledPool 的线程中执行,强行关闭会打断 destroy 的执行。 - scheduledPool.shutdown(); - - // 1. 通知 ProcessorTracker 释放资源 - TaskTrackerStopInstanceReq stopRequest = new TaskTrackerStopInstanceReq(); - stopRequest.setInstanceId(instanceId); - ptStatusHolder.getAllProcessorTrackers().forEach(ptIP -> { - String ptPath = AkkaUtils.getAkkaWorkerPath(ptIP, RemoteConstant.PROCESSOR_TRACKER_ACTOR_NAME); - ActorSelection ptActor = workerRuntime.getActorSystem().actorSelection(ptPath); - // 不可靠通知,ProcessorTracker 也可以靠自己的定时任务/问询等方式关闭 - ptActor.tell(stopRequest, null); - }); - - // 2. 删除所有数据库数据 - boolean dbSuccess = taskPersistenceService.deleteAllTasks(instanceId); - if (!dbSuccess) { - log.error("[TaskTracker-{}] delete tasks from database failed.", instanceId); - } else { - log.debug("[TaskTracker-{}] delete all tasks from database successfully.", instanceId); - } - - // 3. 移除顶层引用,送去 GC - TaskTrackerPool.remove(instanceId); - - log.info("[TaskTracker-{}] TaskTracker has left the world(using {}), bye~", instanceId, sw.stop()); - - // 4. 强制关闭线程池 - if (!scheduledPool.isTerminated()) { - CommonUtils.executeIgnoreException(() -> scheduledPool.shutdownNow()); - } - - } - - /* *************************** 对内方法区 *************************** */ - - /** - * 派发任务到 ProcessorTracker - * - * @param task 需要被执行的任务 - * @param processorTrackerAddress ProcessorTracker的地址(IP:Port) - */ - protected void dispatchTask(TaskDO task, String processorTrackerAddress) { - - // 1. 持久化,更新数据库(如果更新数据库失败,可能导致重复执行,先不处理) - TaskDO updateEntity = new TaskDO(); - updateEntity.setStatus(TaskStatus.DISPATCH_SUCCESS_WORKER_UNCHECK.getValue()); - // 写入处理该任务的 ProcessorTracker - updateEntity.setAddress(processorTrackerAddress); - boolean success = taskPersistenceService.updateTask(instanceId, task.getTaskId(), updateEntity); - if (!success) { - log.warn("[TaskTracker-{}] dispatch task(taskId={},taskName={}) failed due to update task status failed.", instanceId, task.getTaskId(), task.getTaskName()); - return; - } - - // 2. 更新 ProcessorTrackerStatus 状态 - ptStatusHolder.getProcessorTrackerStatus(processorTrackerAddress).setDispatched(true); - // 3. 初始化缓存 - taskId2BriefInfo.put(task.getTaskId(), new TaskBriefInfo(task.getTaskId(), TaskStatus.DISPATCH_SUCCESS_WORKER_UNCHECK, -1L)); - - // 4. 任务派发 - TaskTrackerStartTaskReq startTaskReq = new TaskTrackerStartTaskReq(instanceInfo, task, workerRuntime.getWorkerAddress()); - String ptActorPath = AkkaUtils.getAkkaWorkerPath(processorTrackerAddress, RemoteConstant.PROCESSOR_TRACKER_ACTOR_NAME); - ActorSelection ptActor = workerRuntime.getActorSystem().actorSelection(ptActorPath); - ptActor.tell(startTaskReq, null); - - log.debug("[TaskTracker-{}] dispatch task(taskId={},taskName={}) successfully.", instanceId, task.getTaskId(), task.getTaskName()); - } - - /** - * 获取任务实例产生的各个Task状态,用于分析任务实例执行情况 - * - * @param subInstanceId 子任务实例ID - * @return InstanceStatisticsHolder - */ - protected InstanceStatisticsHolder getInstanceStatisticsHolder(long subInstanceId) { - - Map status2Num = taskPersistenceService.getTaskStatusStatistics(instanceId, subInstanceId); - InstanceStatisticsHolder holder = new InstanceStatisticsHolder(); - - holder.waitingDispatchNum = status2Num.getOrDefault(TaskStatus.WAITING_DISPATCH, 0L); - holder.workerUnreceivedNum = status2Num.getOrDefault(TaskStatus.DISPATCH_SUCCESS_WORKER_UNCHECK, 0L); - holder.receivedNum = status2Num.getOrDefault(TaskStatus.WORKER_RECEIVED, 0L); - holder.runningNum = status2Num.getOrDefault(TaskStatus.WORKER_PROCESSING, 0L); - holder.failedNum = status2Num.getOrDefault(TaskStatus.WORKER_PROCESS_FAILED, 0L); - holder.succeedNum = status2Num.getOrDefault(TaskStatus.WORKER_PROCESS_SUCCESS, 0L); - return holder; - } - - - /** - * 定时扫描数据库中的task(出于内存占用量考虑,每次最多获取100个),并将需要执行的任务派发出去 - */ - protected class Dispatcher implements Runnable { - - // 数据库查询限制,每次最多查询几个任务 - private static final int DB_QUERY_LIMIT = 100; - - @Override - public void run() { - - if (finished.get()) { - return; - } - - Stopwatch stopwatch = Stopwatch.createStarted(); - - // 1. 获取可以派发任务的 ProcessorTracker - List availablePtIps = ptStatusHolder.getAvailableProcessorTrackers(); - - // 2. 没有可用 ProcessorTracker,本次不派发 - if (availablePtIps.isEmpty()) { - log.debug("[TaskTracker-{}] no available ProcessorTracker now.", instanceId); - return; - } - - // 3. 避免大查询,分批派发任务 - long currentDispatchNum = 0; - long maxDispatchNum = availablePtIps.size() * instanceInfo.getThreadConcurrency() * 2L; - AtomicInteger index = new AtomicInteger(0); - - // 4. 循环查询数据库,获取需要派发的任务 - while (maxDispatchNum > currentDispatchNum) { - - int dbQueryLimit = Math.min(DB_QUERY_LIMIT, (int) maxDispatchNum); - List needDispatchTasks = taskPersistenceService.getTaskByStatus(instanceId, TaskStatus.WAITING_DISPATCH, dbQueryLimit); - currentDispatchNum += needDispatchTasks.size(); - - needDispatchTasks.forEach(task -> { - // 获取 ProcessorTracker 地址,如果 Task 中自带了 Address,则使用该 Address - String ptAddress = task.getAddress(); - if (StringUtils.isEmpty(ptAddress) || RemoteConstant.EMPTY_ADDRESS.equals(ptAddress)) { - ptAddress = availablePtIps.get(index.getAndIncrement() % availablePtIps.size()); - } - dispatchTask(task, ptAddress); - }); - - // 数量不足 或 查询失败,则终止循环 - if (needDispatchTasks.size() < dbQueryLimit) { - break; - } - } - - log.debug("[TaskTracker-{}] dispatched {} tasks,using time {}.", instanceId, currentDispatchNum, stopwatch.stop()); - } - } - - /** - * 执行器动态上线(for 秒级任务和 MR 任务) - * 原则:server 查询得到的 执行器状态不会干预 worker 自己维护的状态,即只做新增,不做任何修改 - */ - protected class WorkerDetector implements Runnable { - @Override - public void run() { - - boolean needMoreWorker = ptStatusHolder.checkNeedMoreWorker(); - log.info("[TaskTracker-{}] checkNeedMoreWorker: {}", instanceId, needMoreWorker); - if (!needMoreWorker) { - return; - } - - String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); - if (StringUtils.isEmpty(serverPath)) { - log.warn("[TaskTracker-{}] no server available, won't start worker detective!", instanceId); - return; - } - WorkerQueryExecutorClusterReq req = new WorkerQueryExecutorClusterReq(workerRuntime.getAppId(), instanceInfo.getJobId()); - AskResponse response = AkkaUtils.easyAsk(workerRuntime.getActorSystem().actorSelection(serverPath), req); - if (!response.isSuccess()) { - log.warn("[TaskTracker-{}] detective failed due to ask failed, message is {}", instanceId, response.getMessage()); - return; - } - try { - List workerList = JsonUtils.parseObject(response.getData(), new TypeReference>() {}); - ptStatusHolder.register(workerList); - } catch (Exception e) { - log.warn("[TaskTracker-{}] detective failed!", instanceId, e); - } - } - } - - @Data - @AllArgsConstructor - protected static class TaskBriefInfo { - - private String id; - - private TaskStatus status; - - private Long lastReportTime; - } - - /** - * 存储任务实例产生的各个Task状态,用于分析任务实例执行情况 - */ - @Data - protected static class InstanceStatisticsHolder { - // 等待派发状态(仅存在 TaskTracker 数据库中) - protected long waitingDispatchNum; - // 已派发,但 ProcessorTracker 未确认,可能由于网络错误请求未送达,也有可能 ProcessorTracker 线程池满,拒绝执行 - protected long workerUnreceivedNum; - // ProcessorTracker确认接收,存在与线程池队列中,排队执行 - protected long receivedNum; - // ProcessorTracker正在执行 - protected long runningNum; - protected long failedNum; - protected long succeedNum; - - public long getTotalTaskNum() { - return waitingDispatchNum + workerUnreceivedNum + receivedNum + runningNum + failedNum + succeedNum; - } - } - - /** - * 初始化 TaskTracker - * - * @param req 服务器调度任务实例运行请求 - */ - protected abstract void initTaskTracker(ServerScheduleJobReq req); /** * 查询任务实例的详细运行状态 @@ -634,4 +94,42 @@ public abstract class TaskTracker { * @return 任务实例的详细运行状态 */ public abstract InstanceDetail fetchRunningStatus(); + + + public static void reportCreateErrorToServer(ServerScheduleJobReq req, WorkerRuntime workerRuntime, Exception e) { + log.warn("[TaskTracker-{}] create TaskTracker from request({}) failed.", req.getInstanceId(), req, e); + // 直接发送失败请求 + TaskTrackerReportInstanceStatusReq response = new TaskTrackerReportInstanceStatusReq(); + BeanUtils.copyProperties(req, response); + response.setInstanceStatus(InstanceStatus.FAILED.getV()); + response.setResult(String.format("init TaskTracker failed, reason: %s", e.toString())); + response.setReportTime(System.currentTimeMillis()); + response.setStartTime(System.currentTimeMillis()); + response.setSourceAddress(workerRuntime.getWorkerAddress()); + + String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); + ActorSelection serverActor = workerRuntime.getActorSystem().actorSelection(serverPath); + serverActor.tell(response, null); + } + + protected void reportFinalStatusThenDestroy(ActorSelection serverActor, TaskTrackerReportInstanceStatusReq reportInstanceStatusReq) { + // 最终状态需要可靠上报 + CompletionStage ask = Patterns.ask(serverActor, reportInstanceStatusReq, Duration.ofSeconds(15)); + boolean serverAccepted = false; + try { + AskResponse askResponse = (AskResponse) ask.toCompletableFuture().get(15, TimeUnit.SECONDS); + serverAccepted = askResponse.isSuccess(); + } catch (Exception e) { + log.warn("[TaskTracker-{}] report finished status failed, req={}.", instanceId, reportInstanceStatusReq, e); + } + if (!serverAccepted) { + if (++reportFailedCnt > MAX_REPORT_FAILED_THRESHOLD) { + log.error("[TaskTracker-{}] try to report finished status(detail={}) lots of times but all failed, it's time to give up, so the process result will be dropped", instanceId, reportInstanceStatusReq); + destroy(); + } + return; + } + log.info("[TaskTracker-{}] report finished status(detail={}) success", instanceId, reportInstanceStatusReq); + destroy(); + } } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTrackerPool.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTrackerPool.java deleted file mode 100644 index 9a967dfe..00000000 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/TaskTrackerPool.java +++ /dev/null @@ -1,45 +0,0 @@ -package tech.powerjob.worker.core.tracker.task; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import java.util.List; -import java.util.Map; -import java.util.function.Function; - -/** - * 持有 TaskTracker 对象 - * - * @author tjq - * @since 2020/3/24 - */ -public class TaskTrackerPool { - - private static final Map instanceId2TaskTracker = Maps.newConcurrentMap(); - - /** - * 获取 TaskTracker - */ - public static TaskTracker getTaskTrackerPool(Long instanceId) { - return instanceId2TaskTracker.get(instanceId); - } - - public static TaskTracker remove(Long instanceId) { - return instanceId2TaskTracker.remove(instanceId); - } - - public static void atomicCreateTaskTracker(Long instanceId, Function creator) { - instanceId2TaskTracker.computeIfAbsent(instanceId, creator); - } - - public static List getAllFrequentTaskTrackerKeys() { - List keys = Lists.newLinkedList(); - instanceId2TaskTracker.forEach((key, tk) -> { - if (tk instanceof FrequentTaskTracker) { - keys.add(key); - } - }); - return keys; - } - -} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/CommonTaskTracker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/CommonTaskTracker.java similarity index 86% rename from powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/CommonTaskTracker.java rename to powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/CommonTaskTracker.java index 1cf442b5..ef9c2e0a 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/CommonTaskTracker.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/CommonTaskTracker.java @@ -1,4 +1,4 @@ -package tech.powerjob.worker.core.tracker.task; +package tech.powerjob.worker.core.tracker.task.heavy; import akka.actor.ActorSelection; import akka.pattern.Patterns; @@ -8,11 +8,11 @@ import lombok.ToString; import lombok.extern.slf4j.Slf4j; import org.springframework.util.CollectionUtils; import tech.powerjob.common.PowerJobDKey; -import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.RemoteConstant; import tech.powerjob.common.SystemInstanceResult; import tech.powerjob.common.enums.ExecuteType; import tech.powerjob.common.enums.InstanceStatus; +import tech.powerjob.common.exception.PowerJobException; import tech.powerjob.common.model.InstanceDetail; import tech.powerjob.common.request.ServerScheduleJobReq; import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq; @@ -39,12 +39,8 @@ import java.util.concurrent.TimeUnit; */ @Slf4j @ToString -public class CommonTaskTracker extends TaskTracker { +public class CommonTaskTracker extends HeavyTaskTracker { - /** - * 连续上报多次失败后放弃上报,视为结果不可达,TaskTracker down - */ - private int reportFailedCnt = 0; /** * 根任务 ID */ @@ -55,8 +51,6 @@ public class CommonTaskTracker extends TaskTracker { */ public static final String LAST_TASK_ID = "9999"; - private static final int MAX_REPORT_FAILED_THRESHOLD = 5; - protected CommonTaskTracker(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { super(req, workerRuntime); } @@ -107,12 +101,7 @@ public class CommonTaskTracker extends TaskTracker { } - /** - * 任务是否超时 - */ - public boolean isTimeout() { - return System.currentTimeMillis() - createTime > instanceInfo.getInstanceTimeoutMS(); - } + /** * 持久化根任务,只有完成持久化才能视为任务开始running(先持久化,再报告server) @@ -133,7 +122,7 @@ public class CommonTaskTracker extends TaskTracker { if (taskPersistenceService.save(rootTask)) { log.info("[TaskTracker-{}] create root task successfully.", instanceId); - }else { + } else { log.error("[TaskTracker-{}] create root task failed.", instanceId); throw new PowerJobException("create root task failed for instance: " + instanceId); } @@ -179,7 +168,7 @@ public class CommonTaskTracker extends TaskTracker { if (finishedNum == 0) { finished.set(true); result = SystemInstanceResult.TASK_INIT_FAILED; - }else { + } else { ExecuteType executeType = ExecuteType.valueOf(instanceInfo.getExecuteType()); switch (executeType) { @@ -192,7 +181,7 @@ public class CommonTaskTracker extends TaskTracker { success = false; result = SystemInstanceResult.UNKNOWN_BUG; log.warn("[TaskTracker-{}] there must have some bug in TaskTracker.", instanceId); - }else { + } else { result = allTask.get(0).getResult(); success = allTask.get(0).getStatus() == TaskStatus.WORKER_PROCESS_SUCCESS.getValue(); } @@ -219,7 +208,7 @@ public class CommonTaskTracker extends TaskTracker { result = resultTask.getResult(); } - }else { + } else { // 不存在,代表前置任务刚刚执行完毕,需要创建 lastTask,最终任务必须在本机执行! TaskDO newLastTask = new TaskDO(); @@ -245,35 +234,11 @@ public class CommonTaskTracker extends TaskTracker { // 4. 执行完毕,报告服务器 if (finished.get()) { - req.setResult(result); // 上报追加的工作流上下文信息 req.setAppendedWfContext(appendedWfContext); req.setInstanceStatus(success ? InstanceStatus.SUCCEED.getV() : InstanceStatus.FAILED.getV()); - - CompletionStage askCS = Patterns.ask(serverActor, req, Duration.ofMillis(RemoteConstant.DEFAULT_TIMEOUT_MS)); - - boolean serverAccepted = false; - try { - AskResponse askResponse = (AskResponse) askCS.toCompletableFuture().get(RemoteConstant.DEFAULT_TIMEOUT_MS, TimeUnit.MILLISECONDS); - serverAccepted = askResponse.isSuccess(); - }catch (Exception e) { - log.warn("[TaskTracker-{}] report finished status failed, result={}.", instanceId, result, e); - } - - // 服务器未接受上报,则等待下次重新上报 - if (!serverAccepted) { - if (++reportFailedCnt > MAX_REPORT_FAILED_THRESHOLD) { - log.error("[TaskTracker-{}] try to report finished status(success={}, result={}) lots of times but all failed, it's time to give up, so the process result will be dropped", instanceId, success, result); - destroy(); - } - return; - } - - // 服务器已经更新状态,任务已经执行完毕,开始释放所有资源 - log.info("[TaskTracker-{}] instance process finished,result = {}, start to release resource...", instanceId, result); - - destroy(); + reportFinalStatusThenDestroy(serverActor,req); return; } @@ -318,11 +283,21 @@ public class CommonTaskTracker extends TaskTracker { } } + /** + * 任务是否超时 + */ + public boolean isTimeout() { + if (instanceInfo.getInstanceTimeoutMS() > 0) { + return System.currentTimeMillis() - createTime > instanceInfo.getInstanceTimeoutMS(); + } + return false; + } + @Override public void run() { try { innerRun(); - }catch (Exception e) { + } catch (Exception e) { log.warn("[TaskTracker-{}] status checker execute failed, please fix the bug (@tjq)!", instanceId, e); } } diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/FrequentTaskTracker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/FrequentTaskTracker.java similarity index 99% rename from powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/FrequentTaskTracker.java rename to powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/FrequentTaskTracker.java index 071477d5..f46ea3de 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/FrequentTaskTracker.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/FrequentTaskTracker.java @@ -1,4 +1,4 @@ -package tech.powerjob.worker.core.tracker.task; +package tech.powerjob.worker.core.tracker.task.heavy; import akka.actor.ActorSelection; import com.fasterxml.jackson.core.JsonProcessingException; @@ -8,8 +8,8 @@ import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import lombok.Data; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.springframework.beans.BeanUtils; -import org.springframework.util.StringUtils; import tech.powerjob.common.enums.ExecuteType; import tech.powerjob.common.enums.InstanceStatus; import tech.powerjob.common.enums.TimeExpressionType; @@ -41,7 +41,7 @@ import java.util.concurrent.atomic.AtomicLong; * @since 2020/4/8 */ @Slf4j -public class FrequentTaskTracker extends TaskTracker { +public class FrequentTaskTracker extends HeavyTaskTracker { /** * 时间表达式类型 @@ -272,6 +272,7 @@ public class FrequentTaskTracker extends TaskTracker { long executeTimeout = nowTS - timeHolder.startTime; // 超时(包含总运行时间超时和心跳包超时),直接判定为失败 + if (executeTimeout > instanceTimeoutMS) { onFinished(subInstanceId, false, "RUNNING_TIMEOUT", iterator); continue; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/HeavyTaskTracker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/HeavyTaskTracker.java new file mode 100644 index 00000000..ab459392 --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/heavy/HeavyTaskTracker.java @@ -0,0 +1,585 @@ +package tech.powerjob.worker.core.tracker.task.heavy; + +import akka.actor.ActorSelection; +import com.fasterxml.jackson.core.type.TypeReference; +import lombok.AllArgsConstructor; +import tech.powerjob.common.enums.ExecuteType; +import tech.powerjob.common.enums.InstanceStatus; +import tech.powerjob.common.RemoteConstant; +import tech.powerjob.common.enums.TimeExpressionType; +import tech.powerjob.common.model.InstanceDetail; +import tech.powerjob.common.request.ServerScheduleJobReq; +import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq; +import tech.powerjob.common.request.WorkerQueryExecutorClusterReq; +import tech.powerjob.common.response.AskResponse; +import tech.powerjob.common.utils.CommonUtils; +import tech.powerjob.common.serialize.JsonUtils; +import tech.powerjob.common.utils.SegmentLock; +import tech.powerjob.worker.common.WorkerRuntime; +import tech.powerjob.worker.common.constants.TaskConstant; +import tech.powerjob.worker.common.constants.TaskStatus; +import tech.powerjob.worker.common.utils.AkkaUtils; +import tech.powerjob.worker.common.utils.WorkflowContextUtils; +import tech.powerjob.worker.core.ha.ProcessorTrackerStatusHolder; +import tech.powerjob.worker.core.tracker.manager.HeavyTaskTrackerManager; +import tech.powerjob.worker.core.tracker.task.TaskTracker; +import tech.powerjob.worker.persistence.TaskDO; +import tech.powerjob.worker.persistence.TaskPersistenceService; +import tech.powerjob.worker.pojo.request.ProcessorTrackerStatusReportReq; +import tech.powerjob.worker.pojo.request.TaskTrackerStartTaskReq; +import tech.powerjob.worker.pojo.request.TaskTrackerStopInstanceReq; +import com.google.common.base.Stopwatch; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; +import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * 负责管理 JobInstance 的运行,主要包括任务的派发(MR可能存在大量的任务)和状态的更新 + * + * @author tjq + * @since 2020/4/8 + */ +@Slf4j +public abstract class HeavyTaskTracker extends TaskTracker { + + /** + * ProcessTracker 状态管理 + */ + protected final ProcessorTrackerStatusHolder ptStatusHolder; + /** + * 数据库持久化服务 + */ + protected final TaskPersistenceService taskPersistenceService; + /** + * 定时任务线程池 + */ + protected ScheduledExecutorService scheduledPool; + /** + * 任务信息缓存 + */ + private final Cache taskId2BriefInfo; + + + /** + * 分段锁 + */ + private final SegmentLock segmentLock; + private static final int UPDATE_CONCURRENCY = 4; + + protected HeavyTaskTracker(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { + // 初始化成员变量 + super(req,workerRuntime); + // 赋予时间表达式类型 + instanceInfo.setTimeExpressionType(TimeExpressionType.valueOf(req.getTimeExpressionType()).getV()); + // 保护性操作 + instanceInfo.setThreadConcurrency(Math.max(1, instanceInfo.getThreadConcurrency())); + this.ptStatusHolder = new ProcessorTrackerStatusHolder(instanceId, req.getMaxWorkerCount(), req.getAllWorkerAddress()); + this.taskPersistenceService = workerRuntime.getTaskPersistenceService(); + // 构建缓存 + taskId2BriefInfo = CacheBuilder.newBuilder().maximumSize(1024).build(); + + // 构建分段锁 + segmentLock = new SegmentLock(UPDATE_CONCURRENCY); + + // 子类自定义初始化操作 + initTaskTracker(req); + + log.info("[TaskTracker-{}] create TaskTracker successfully.", instanceId); + } + + /** + * 静态方法创建 TaskTracker + * + * @param req 服务端调度任务请求 + * @return API/CRON -> CommonTaskTracker, FIX_RATE/FIX_DELAY -> FrequentTaskTracker + */ + public static HeavyTaskTracker create(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { + try { + TimeExpressionType timeExpressionType = TimeExpressionType.valueOf(req.getTimeExpressionType()); + switch (timeExpressionType) { + case FIXED_RATE: + case FIXED_DELAY: + return new FrequentTaskTracker(req, workerRuntime); + default: + return new CommonTaskTracker(req, workerRuntime); + } + } catch (Exception e) { + reportCreateErrorToServer(req, workerRuntime, e); + } + return null; + } + + + + /* *************************** 对外方法区 *************************** */ + + /** + * 更新追加的上下文数据 + * + * @param newAppendedWfContext 追加的上下文数据 + * @since 2021/02/05 + */ + public void updateAppendedWfContext(Map newAppendedWfContext) { + + // check + if (instanceInfo.getWfInstanceId() == null || CollectionUtils.isEmpty(newAppendedWfContext)) { + // 只有工作流中的任务才有存储的必要 + return; + } + // 检查追加的上下文大小是否超出限制 + if (WorkflowContextUtils.isExceededLengthLimit(appendedWfContext, workerRuntime.getWorkerConfig().getMaxAppendedWfContextLength())) { + log.warn("[TaskTracker-{}]current length of appended workflow context data is greater than {}, this appended workflow context data will be ignore!", instanceInfo.getInstanceId(), workerRuntime.getWorkerConfig().getMaxAppendedWfContextLength()); + // ignore appended workflow context data + return; + } + + for (Map.Entry entry : newAppendedWfContext.entrySet()) { + String originValue = appendedWfContext.put(entry.getKey(), entry.getValue()); + log.info("[TaskTracker-{}] update appended workflow context data {} : {} -> {}", instanceInfo.getInstanceId(), entry.getKey(), originValue, entry.getValue()); + } + + } + + + /** + * 更新Task状态 + * V1.0.0 -> V1.0.1(e405e283ad7f97b0b4e5d369c7de884c0caf9192) 锁方案变更,从 synchronized (taskId.intern()) 修改为分段锁,能大大减少内存占用,损失的只有理论并发度而已 + * + * @param subInstanceId 子任务实例ID + * @param taskId task的ID(task为任务实例的执行单位) + * @param newStatus task的新状态 + * @param reportTime 上报时间 + * @param result task的执行结果,未执行完成时为空 + */ + @SuppressWarnings({"squid:S3776", "squid:S2142"}) + public void updateTaskStatus(Long subInstanceId, String taskId, int newStatus, long reportTime, @Nullable String result) { + + if (finished.get()) { + return; + } + TaskStatus nTaskStatus = TaskStatus.of(newStatus); + + int lockId = taskId.hashCode(); + try { + + // 阻塞获取锁 + segmentLock.lockInterruptible(lockId); + TaskBriefInfo taskBriefInfo = taskId2BriefInfo.getIfPresent(taskId); + + // 缓存中不存在,从数据库查 + if (taskBriefInfo == null) { + Optional taskOpt = taskPersistenceService.getTask(instanceId, taskId); + if (taskOpt.isPresent()) { + TaskDO taskDO = taskOpt.get(); + taskBriefInfo = new TaskBriefInfo(taskId, TaskStatus.of(taskDO.getStatus()), taskDO.getLastReportTime()); + } else { + // 理论上不存在这种情况,除非数据库异常 + log.error("[TaskTracker-{}-{}] can't find task by taskId={}.", instanceId, subInstanceId, taskId); + taskBriefInfo = new TaskBriefInfo(taskId, TaskStatus.WAITING_DISPATCH, -1L); + } + // 写入缓存 + taskId2BriefInfo.put(taskId, taskBriefInfo); + } + + // 过滤过期的请求(潜在的集群时间一致性需求,重试跨 Worker 时,时间不一致可能导致问题) + if (taskBriefInfo.getLastReportTime() > reportTime) { + log.warn("[TaskTracker-{}-{}] receive expired(last {} > current {}) task status report(taskId={},newStatus={}), TaskTracker will drop this report.", + instanceId, subInstanceId, taskBriefInfo.getLastReportTime(), reportTime, taskId, newStatus); + return; + } + // 检查状态转移是否合法,fix issue 404 + if (nTaskStatus.getValue() < taskBriefInfo.getStatus().getValue()) { + log.warn("[TaskTracker-{}-{}] receive invalid task status report(taskId={},currentStatus={},newStatus={}), TaskTracker will drop this report.", + instanceId, subInstanceId, taskId, taskBriefInfo.getStatus().getValue(), newStatus); + return; + } + + // 此时本次请求已经有效,先更新相关信息 + taskBriefInfo.setLastReportTime(reportTime); + taskBriefInfo.setStatus(nTaskStatus); + + // 处理失败的情况 + int configTaskRetryNum = instanceInfo.getTaskRetryNum(); + if (nTaskStatus == TaskStatus.WORKER_PROCESS_FAILED && configTaskRetryNum >= 1) { + + // 失败不是主要的情况,多查一次数据库也问题不大(况且前面有缓存顶着,大部分情况之前不会去查DB) + Optional taskOpt = taskPersistenceService.getTask(instanceId, taskId); + // 查询DB再失败的话,就不重试了... + if (taskOpt.isPresent()) { + int failedCnt = taskOpt.get().getFailedCnt(); + if (failedCnt < configTaskRetryNum) { + + TaskDO updateEntity = new TaskDO(); + updateEntity.setFailedCnt(failedCnt + 1); + + /* + 地址规则: + 1. 当前存储的地址为任务派发的目的地(ProcessorTracker地址) + 2. 根任务、最终任务必须由TaskTracker所在机器执行(如果是根任务和最终任务,不应当修改地址) + 3. 广播任务每台机器都需要执行,因此不应该重新分配worker(广播任务不应当修改地址) + */ + String taskName = taskOpt.get().getTaskName(); + ExecuteType executeType = ExecuteType.valueOf(instanceInfo.getExecuteType()); + if (!taskName.equals(TaskConstant.ROOT_TASK_NAME) && !taskName.equals(TaskConstant.LAST_TASK_NAME) && executeType != ExecuteType.BROADCAST) { + updateEntity.setAddress(RemoteConstant.EMPTY_ADDRESS); + } + + updateEntity.setStatus(TaskStatus.WAITING_DISPATCH.getValue()); + updateEntity.setLastReportTime(reportTime); + + boolean retryTask = taskPersistenceService.updateTask(instanceId, taskId, updateEntity); + if (retryTask) { + log.info("[TaskTracker-{}-{}] task(taskId={}) process failed, TaskTracker will have a retry.", instanceId, subInstanceId, taskId); + return; + } + } + } + } + + // 更新状态(失败重试写入DB失败的,也就不重试了...谁让你那么倒霉呢...) + result = result == null ? "" : result; + boolean updateResult = taskPersistenceService.updateTaskStatus(instanceId, taskId, newStatus, reportTime, result); + + if (!updateResult) { + log.warn("[TaskTracker-{}-{}] update task status failed, this task(taskId={}) may be processed repeatedly!", instanceId, subInstanceId, taskId); + } + + } catch (InterruptedException ignore) { + // ignore + } catch (Exception e) { + log.warn("[TaskTracker-{}-{}] update task status failed.", instanceId, subInstanceId, e); + } finally { + segmentLock.unlock(lockId); + } + } + + /** + * 提交Task任务(MapReduce的Map,Broadcast的广播),上层保证 batchSize,同时插入过多数据可能导致失败 + * + * @param newTaskList 新增的子任务列表 + */ + public boolean submitTask(List newTaskList) { + if (finished.get()) { + return true; + } + if (CollectionUtils.isEmpty(newTaskList)) { + return true; + } + // 基础处理(多循环一次虽然有些浪费,但分布式执行中,这点耗时绝不是主要占比,忽略不计!) + newTaskList.forEach(task -> { + task.setInstanceId(instanceId); + task.setStatus(TaskStatus.WAITING_DISPATCH.getValue()); + task.setFailedCnt(0); + task.setLastModifiedTime(System.currentTimeMillis()); + task.setCreatedTime(System.currentTimeMillis()); + task.setLastReportTime(-1L); + }); + + log.debug("[TaskTracker-{}] receive new tasks: {}", instanceId, newTaskList); + return taskPersistenceService.batchSave(newTaskList); + } + + /** + * 处理 ProcessorTracker 的心跳信息 + * + * @param heartbeatReq ProcessorTracker(任务的执行管理器)发来的心跳包,包含了其当前状态 + */ + public void receiveProcessorTrackerHeartbeat(ProcessorTrackerStatusReportReq heartbeatReq) { + log.debug("[TaskTracker-{}] receive heartbeat: {}", instanceId, heartbeatReq); + ptStatusHolder.updateStatus(heartbeatReq); + + // 上报空闲,检查是否已经接收到全部该 ProcessorTracker 负责的任务 + if (heartbeatReq.getType() == ProcessorTrackerStatusReportReq.IDLE) { + String idlePtAddress = heartbeatReq.getAddress(); + // 该 ProcessorTracker 已销毁,重置为初始状态 + ptStatusHolder.getProcessorTrackerStatus(idlePtAddress).setDispatched(false); + List unfinishedTask = taskPersistenceService.getAllUnFinishedTaskByAddress(instanceId, idlePtAddress); + if (!CollectionUtils.isEmpty(unfinishedTask)) { + log.warn("[TaskTracker-{}] ProcessorTracker({}) is idle now but have unfinished tasks: {}", instanceId, idlePtAddress, unfinishedTask); + unfinishedTask.forEach(task -> updateTaskStatus(task.getSubInstanceId(), task.getTaskId(), TaskStatus.WORKER_PROCESS_FAILED.getValue(), System.currentTimeMillis(), "SYSTEM: unreceived process result")); + } + } + } + + /** + * 生成广播任务 + * + * @param preExecuteSuccess 预执行广播任务运行状态 + * @param subInstanceId 子实例ID + * @param preTaskId 预执行广播任务的taskId + * @param result 预执行广播任务的结果 + */ + public void broadcast(boolean preExecuteSuccess, long subInstanceId, String preTaskId, String result) { + + if (finished.get()) { + return; + } + + log.info("[TaskTracker-{}-{}] finished broadcast's preProcess, preExecuteSuccess:{},preTaskId:{},result:{}", instanceId, subInstanceId, preExecuteSuccess, preTaskId, result); + + // 生成集群子任务 + if (preExecuteSuccess) { + List allWorkerAddress = ptStatusHolder.getAllProcessorTrackers(); + List subTaskList = Lists.newLinkedList(); + for (int i = 0; i < allWorkerAddress.size(); i++) { + TaskDO subTask = new TaskDO(); + subTask.setSubInstanceId(subInstanceId); + subTask.setTaskName(TaskConstant.BROADCAST_TASK_NAME); + subTask.setTaskId(preTaskId + "." + i); + // 广播任务直接写入派发地址 + subTask.setAddress(allWorkerAddress.get(i)); + subTaskList.add(subTask); + } + submitTask(subTaskList); + } else { + log.warn("[TaskTracker-{}-{}] BroadcastTask failed because of preProcess failed, preProcess result={}.", instanceId, subInstanceId, result); + } + } + + /** + * 销毁自身,释放资源 + */ + @Override + public void destroy() { + + finished.set(true); + + Stopwatch sw = Stopwatch.createStarted(); + // 0. 开始关闭线程池,不能使用 shutdownNow(),因为 destroy 方法本身就在 scheduledPool 的线程中执行,强行关闭会打断 destroy 的执行。 + scheduledPool.shutdown(); + + // 1. 通知 ProcessorTracker 释放资源 + TaskTrackerStopInstanceReq stopRequest = new TaskTrackerStopInstanceReq(); + stopRequest.setInstanceId(instanceId); + ptStatusHolder.getAllProcessorTrackers().forEach(ptIP -> { + String ptPath = AkkaUtils.getAkkaWorkerPath(ptIP, RemoteConstant.PROCESSOR_TRACKER_ACTOR_NAME); + ActorSelection ptActor = workerRuntime.getActorSystem().actorSelection(ptPath); + // 不可靠通知,ProcessorTracker 也可以靠自己的定时任务/问询等方式关闭 + ptActor.tell(stopRequest, null); + }); + + // 2. 删除所有数据库数据 + boolean dbSuccess = taskPersistenceService.deleteAllTasks(instanceId); + if (!dbSuccess) { + log.error("[TaskTracker-{}] delete tasks from database failed.", instanceId); + } else { + log.debug("[TaskTracker-{}] delete all tasks from database successfully.", instanceId); + } + + // 3. 移除顶层引用,送去 GC + HeavyTaskTrackerManager.removeTaskTracker(instanceId); + + log.info("[TaskTracker-{}] TaskTracker has left the world(using {}), bye~", instanceId, sw.stop()); + + // 4. 强制关闭线程池 + if (!scheduledPool.isTerminated()) { + CommonUtils.executeIgnoreException(() -> scheduledPool.shutdownNow()); + } + + } + + @Override + public void stopTask() { + destroy(); + } + + /* *************************** 对内方法区 *************************** */ + + /** + * 派发任务到 ProcessorTracker + * + * @param task 需要被执行的任务 + * @param processorTrackerAddress ProcessorTracker的地址(IP:Port) + */ + protected void dispatchTask(TaskDO task, String processorTrackerAddress) { + + // 1. 持久化,更新数据库(如果更新数据库失败,可能导致重复执行,先不处理) + TaskDO updateEntity = new TaskDO(); + updateEntity.setStatus(TaskStatus.DISPATCH_SUCCESS_WORKER_UNCHECK.getValue()); + // 写入处理该任务的 ProcessorTracker + updateEntity.setAddress(processorTrackerAddress); + boolean success = taskPersistenceService.updateTask(instanceId, task.getTaskId(), updateEntity); + if (!success) { + log.warn("[TaskTracker-{}] dispatch task(taskId={},taskName={}) failed due to update task status failed.", instanceId, task.getTaskId(), task.getTaskName()); + return; + } + + // 2. 更新 ProcessorTrackerStatus 状态 + ptStatusHolder.getProcessorTrackerStatus(processorTrackerAddress).setDispatched(true); + // 3. 初始化缓存 + taskId2BriefInfo.put(task.getTaskId(), new TaskBriefInfo(task.getTaskId(), TaskStatus.DISPATCH_SUCCESS_WORKER_UNCHECK, -1L)); + + // 4. 任务派发 + TaskTrackerStartTaskReq startTaskReq = new TaskTrackerStartTaskReq(instanceInfo, task, workerRuntime.getWorkerAddress()); + String ptActorPath = AkkaUtils.getAkkaWorkerPath(processorTrackerAddress, RemoteConstant.PROCESSOR_TRACKER_ACTOR_NAME); + ActorSelection ptActor = workerRuntime.getActorSystem().actorSelection(ptActorPath); + ptActor.tell(startTaskReq, null); + + log.debug("[TaskTracker-{}] dispatch task(taskId={},taskName={}) successfully.", instanceId, task.getTaskId(), task.getTaskName()); + } + + /** + * 获取任务实例产生的各个Task状态,用于分析任务实例执行情况 + * + * @param subInstanceId 子任务实例ID + * @return InstanceStatisticsHolder + */ + protected InstanceStatisticsHolder getInstanceStatisticsHolder(long subInstanceId) { + + Map status2Num = taskPersistenceService.getTaskStatusStatistics(instanceId, subInstanceId); + InstanceStatisticsHolder holder = new InstanceStatisticsHolder(); + + holder.waitingDispatchNum = status2Num.getOrDefault(TaskStatus.WAITING_DISPATCH, 0L); + holder.workerUnreceivedNum = status2Num.getOrDefault(TaskStatus.DISPATCH_SUCCESS_WORKER_UNCHECK, 0L); + holder.receivedNum = status2Num.getOrDefault(TaskStatus.WORKER_RECEIVED, 0L); + holder.runningNum = status2Num.getOrDefault(TaskStatus.WORKER_PROCESSING, 0L); + holder.failedNum = status2Num.getOrDefault(TaskStatus.WORKER_PROCESS_FAILED, 0L); + holder.succeedNum = status2Num.getOrDefault(TaskStatus.WORKER_PROCESS_SUCCESS, 0L); + return holder; + } + + + /** + * 定时扫描数据库中的task(出于内存占用量考虑,每次最多获取100个),并将需要执行的任务派发出去 + */ + protected class Dispatcher implements Runnable { + + // 数据库查询限制,每次最多查询几个任务 + private static final int DB_QUERY_LIMIT = 100; + + @Override + public void run() { + + if (finished.get()) { + return; + } + + Stopwatch stopwatch = Stopwatch.createStarted(); + + // 1. 获取可以派发任务的 ProcessorTracker + List availablePtIps = ptStatusHolder.getAvailableProcessorTrackers(); + + // 2. 没有可用 ProcessorTracker,本次不派发 + if (availablePtIps.isEmpty()) { + log.debug("[TaskTracker-{}] no available ProcessorTracker now.", instanceId); + return; + } + + // 3. 避免大查询,分批派发任务 + long currentDispatchNum = 0; + long maxDispatchNum = availablePtIps.size() * instanceInfo.getThreadConcurrency() * 2L; + AtomicInteger index = new AtomicInteger(0); + + // 4. 循环查询数据库,获取需要派发的任务 + while (maxDispatchNum > currentDispatchNum) { + + int dbQueryLimit = Math.min(DB_QUERY_LIMIT, (int) maxDispatchNum); + List needDispatchTasks = taskPersistenceService.getTaskByStatus(instanceId, TaskStatus.WAITING_DISPATCH, dbQueryLimit); + currentDispatchNum += needDispatchTasks.size(); + + needDispatchTasks.forEach(task -> { + // 获取 ProcessorTracker 地址,如果 Task 中自带了 Address,则使用该 Address + String ptAddress = task.getAddress(); + if (StringUtils.isEmpty(ptAddress) || RemoteConstant.EMPTY_ADDRESS.equals(ptAddress)) { + ptAddress = availablePtIps.get(index.getAndIncrement() % availablePtIps.size()); + } + dispatchTask(task, ptAddress); + }); + + // 数量不足 或 查询失败,则终止循环 + if (needDispatchTasks.size() < dbQueryLimit) { + break; + } + } + + log.debug("[TaskTracker-{}] dispatched {} tasks,using time {}.", instanceId, currentDispatchNum, stopwatch.stop()); + } + } + + /** + * 执行器动态上线(for 秒级任务和 MR 任务) + * 原则:server 查询得到的 执行器状态不会干预 worker 自己维护的状态,即只做新增,不做任何修改 + */ + protected class WorkerDetector implements Runnable { + @Override + public void run() { + + boolean needMoreWorker = ptStatusHolder.checkNeedMoreWorker(); + log.info("[TaskTracker-{}] checkNeedMoreWorker: {}", instanceId, needMoreWorker); + if (!needMoreWorker) { + return; + } + + String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); + if (StringUtils.isEmpty(serverPath)) { + log.warn("[TaskTracker-{}] no server available, won't start worker detective!", instanceId); + return; + } + WorkerQueryExecutorClusterReq req = new WorkerQueryExecutorClusterReq(workerRuntime.getAppId(), instanceInfo.getJobId()); + AskResponse response = AkkaUtils.easyAsk(workerRuntime.getActorSystem().actorSelection(serverPath), req); + if (!response.isSuccess()) { + log.warn("[TaskTracker-{}] detective failed due to ask failed, message is {}", instanceId, response.getMessage()); + return; + } + try { + List workerList = JsonUtils.parseObject(response.getData(), new TypeReference>() {}); + ptStatusHolder.register(workerList); + } catch (Exception e) { + log.warn("[TaskTracker-{}] detective failed!", instanceId, e); + } + } + } + + @Data + @AllArgsConstructor + protected static class TaskBriefInfo { + + private String id; + + private TaskStatus status; + + private Long lastReportTime; + } + + /** + * 存储任务实例产生的各个Task状态,用于分析任务实例执行情况 + */ + @Data + protected static class InstanceStatisticsHolder { + // 等待派发状态(仅存在 TaskTracker 数据库中) + protected long waitingDispatchNum; + // 已派发,但 ProcessorTracker 未确认,可能由于网络错误请求未送达,也有可能 ProcessorTracker 线程池满,拒绝执行 + protected long workerUnreceivedNum; + // ProcessorTracker确认接收,存在与线程池队列中,排队执行 + protected long receivedNum; + // ProcessorTracker正在执行 + protected long runningNum; + protected long failedNum; + protected long succeedNum; + + public long getTotalTaskNum() { + return waitingDispatchNum + workerUnreceivedNum + receivedNum + runningNum + failedNum + succeedNum; + } + } + + /** + * 初始化 TaskTracker + * + * @param req 服务器调度任务实例运行请求 + */ + protected abstract void initTaskTracker(ServerScheduleJobReq req); +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/light/LightTaskTracker.java b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/light/LightTaskTracker.java new file mode 100644 index 00000000..d46fc5dd --- /dev/null +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/core/tracker/task/light/LightTaskTracker.java @@ -0,0 +1,408 @@ +package tech.powerjob.worker.core.tracker.task.light; + +import akka.actor.ActorSelection; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.RandomUtils; +import org.apache.commons.lang3.StringUtils; +import tech.powerjob.common.PowerJobDKey; +import tech.powerjob.common.SystemInstanceResult; +import tech.powerjob.common.enums.InstanceStatus; +import tech.powerjob.common.model.InstanceDetail; +import tech.powerjob.common.request.ServerScheduleJobReq; +import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq; +import tech.powerjob.worker.common.WorkerRuntime; +import tech.powerjob.worker.common.constants.TaskConstant; +import tech.powerjob.worker.common.constants.TaskStatus; +import tech.powerjob.worker.common.utils.AkkaUtils; +import tech.powerjob.worker.core.processor.*; +import tech.powerjob.worker.core.tracker.manager.LightTaskTrackerManager; +import tech.powerjob.worker.core.tracker.task.TaskTracker; +import tech.powerjob.worker.log.OmsLoggerFactory; + +import java.util.concurrent.Future; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +/** + * @author Echo009 + * @since 2022/9/19 + */ +@Slf4j +public class LightTaskTracker extends TaskTracker { + /** + * statusReportScheduledFuture + */ + private final ScheduledFuture statusReportScheduledFuture; + /** + * timeoutCheckScheduledFuture + */ + private final ScheduledFuture timeoutCheckScheduledFuture; + /** + * processFuture + */ + private final Future processFuture; + /** + * 执行线程 + */ + private final AtomicReference executeThread; + /** + * 处理器信息 + */ + private final ProcessorInfo processorInfo; + /** + * 上下文 + */ + private final TaskContext taskContext; + /** + * 任务状态 + */ + private TaskStatus status; + /** + * 任务开始执行的时间 + */ + private Long taskStartTime; + /** + * 任务执行结束的时间 或者 任务被 kill 掉的时间 + */ + private Long taskEndTime; + /** + * 任务处理结果 + */ + private ProcessResult result; + + private final AtomicBoolean timeoutFlag = new AtomicBoolean(false); + + protected final AtomicBoolean stopFlag = new AtomicBoolean(false); + + protected final AtomicBoolean destroyFlag = new AtomicBoolean(false); + + + public LightTaskTracker(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { + super(req, workerRuntime); + try { + taskContext = constructTaskContext(req, workerRuntime); + // 等待处理 + status = TaskStatus.WORKER_RECEIVED; + // 加载 Processor + processorInfo = ProcessorLoader.loadProcessor(workerRuntime, req.getProcessorType(), req.getProcessorInfo()); + executeThread = new AtomicReference<>(); + long delay = Integer.parseInt(System.getProperty(PowerJobDKey.WORKER_STATUS_CHECK_PERIOD, "15")) * 1000L; + // 初始延迟加入随机值,避免在高并发场景下所有请求集中在一个时间段 + long initDelay = RandomUtils.nextInt(5000, 10000); + // 上报任务状态 + statusReportScheduledFuture = workerRuntime.getExecutorManager().getLightweightTaskStatusCheckExecutor().scheduleWithFixedDelay(this::checkAndReportStatus, initDelay, delay, TimeUnit.MILLISECONDS); + // 超时控制 + if (instanceInfo.getInstanceTimeoutMS() != Integer.MAX_VALUE) { + if (instanceInfo.getInstanceTimeoutMS() < 1000L) { + timeoutCheckScheduledFuture = workerRuntime.getExecutorManager().getLightweightTaskStatusCheckExecutor().scheduleAtFixedRate(this::timeoutCheck, instanceInfo.getInstanceTimeoutMS(), instanceInfo.getInstanceTimeoutMS() / 10, TimeUnit.MILLISECONDS); + } else { + // 执行时间超过 1 s 的任务,超时检测最小颗粒度为 1 s + timeoutCheckScheduledFuture = workerRuntime.getExecutorManager().getLightweightTaskStatusCheckExecutor().scheduleAtFixedRate(this::timeoutCheck, instanceInfo.getInstanceTimeoutMS(), 1000L, TimeUnit.MILLISECONDS); + } + } else { + timeoutCheckScheduledFuture = null; + } + // 提交任务到线程池 + processFuture = workerRuntime.getExecutorManager().getLightweightTaskExecutorService().submit(this::processTask); + } catch (Exception e) { + log.warn("[TaskTracker-{}] fail to create TaskTracker for req:{} ", instanceId, req); + destroy(); + throw e; + } + + } + + /** + * 静态方法创建 TaskTracker + * + * @param req 服务端调度任务请求 + * @return LightTaskTracker + */ + public static LightTaskTracker create(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { + try { + return new LightTaskTracker(req, workerRuntime); + } catch (Exception e) { + reportCreateErrorToServer(req, workerRuntime, e); + } + return null; + } + + + @Override + public void destroy() { + if (!destroyFlag.compareAndSet(false, true)) { + log.warn("[TaskTracker-{}] This TaskTracker has been destroyed!", instanceId); + return; + } + if (statusReportScheduledFuture != null) { + statusReportScheduledFuture.cancel(true); + } + if (timeoutCheckScheduledFuture != null) { + timeoutCheckScheduledFuture.cancel(true); + } + if (processFuture != null) { + processFuture.cancel(true); + } + LightTaskTrackerManager.removeTaskTracker(instanceId); + // 最后一列为总耗时(即占用资源的耗时,当前时间减去创建时间) + log.warn("[TaskTracker-{}] remove TaskTracker,task status {},start time:{},end time:{},real cost:{},total time:{}", instanceId, status, taskStartTime, taskEndTime, taskEndTime != null ? taskEndTime - taskStartTime : "unknown", System.currentTimeMillis() - createTime); + } + + @Override + public void stopTask() { + + // 已经执行完成,忽略 + if (finished.get()) { + log.warn("[TaskTracker-{}] fail to stop task,task is finished!result:{}", instanceId, result); + return; + } + if (!stopFlag.compareAndSet(false, true)) { + log.warn("[TaskTracker-{}] task has been mark as stopped,ignore this request!", instanceId); + return; + } + // 当前任务尚未执行 + if (status == TaskStatus.WORKER_RECEIVED) { + log.warn("[TaskTracker-{}] task is not started,destroy this taskTracker directly!", instanceId); + destroy(); + return; + } + // 正在执行 + if (processFuture != null) { + // 尝试打断 + log.info("[TaskTracker-{}] try to interrupt task!", instanceId); + processFuture.cancel(true); + } + } + + @Override + public InstanceDetail fetchRunningStatus() { + InstanceDetail detail = new InstanceDetail(); + // 填充基础信息 + detail.setActualTriggerTime(createTime); + detail.setStatus(InstanceStatus.RUNNING.getV()); + detail.setTaskTrackerAddress(workerRuntime.getWorkerAddress()); + // 填充详细信息 + InstanceDetail.TaskDetail taskDetail = new InstanceDetail.TaskDetail(); + taskDetail.setSucceedTaskNum(0); + taskDetail.setFailedTaskNum(0); + taskDetail.setTotalTaskNum(1); + detail.setTaskDetail(taskDetail); + return detail; + } + + private ProcessResult processTask() { + executeThread.set(Thread.currentThread()); + // 设置任务开始执行的时间 + taskStartTime = System.currentTimeMillis(); + status = TaskStatus.WORKER_PROCESSING; + // 开始执行时,提交任务判断是否超时 + ProcessResult res = null; + do { + Thread.currentThread().setContextClassLoader(processorInfo.getClassLoader()); + if (res != null && !res.isSuccess()) { + // 重试 + taskContext.setCurrentRetryTimes(taskContext.getCurrentRetryTimes() + 1); + log.warn("[TaskTracker-{}] process failed, TaskTracker will have a retry,current retryTimes : {}", instanceId, taskContext.getCurrentRetryTimes()); + } + try { + res = processorInfo.getBasicProcessor().process(taskContext); + } catch (InterruptedException e) { + log.warn("[TaskTracker-{}] task has been interrupted !", instanceId, e); + Thread.currentThread().interrupt(); + if (timeoutFlag.get()) { + res = new ProcessResult(false, SystemInstanceResult.INSTANCE_EXECUTE_TIMEOUT_INTERRUPTED); + } else if (stopFlag.get()) { + res = new ProcessResult(false, SystemInstanceResult.USER_STOP_INSTANCE_INTERRUPTED); + } else { + res = new ProcessResult(false, e.toString()); + } + } catch (Exception e) { + log.warn("[TaskTracker-{}] process failed !", instanceId, e); + res = new ProcessResult(false, e.toString()); + } + if (res == null) { + log.warn("[TaskTracker-{}] processor return null !", instanceId); + res = new ProcessResult(false, "Processor return null"); + } + } while (!res.isSuccess() && taskContext.getCurrentRetryTimes() < taskContext.getMaxRetryTimes() && !timeoutFlag.get() && !stopFlag.get()); + executeThread.set(null); + taskEndTime = System.currentTimeMillis(); + finished.set(true); + result = res; + status = result.isSuccess() ? TaskStatus.WORKER_PROCESS_SUCCESS : TaskStatus.WORKER_PROCESS_FAILED; + // 取消超时检查任务 + if (timeoutCheckScheduledFuture != null) { + timeoutCheckScheduledFuture.cancel(true); + } + log.info("[TaskTracker-{}] task complete ! create time:{},queue time:{},use time:{},result:{}", instanceId, createTime, taskStartTime - createTime, System.currentTimeMillis() - taskStartTime, result); + // 执行完成后立即上报一次 + checkAndReportStatus(); + return result; + } + + + private synchronized void checkAndReportStatus() { + if (destroyFlag.get()) { + // 已经被销毁,不需要上报状态 + log.info("[TaskTracker-{}] has been destroyed,final status is {},needn't to report status!", instanceId, status); + return; + } + String serverPath = AkkaUtils.getServerActorPath(workerRuntime.getServerDiscoveryService().getCurrentServerAddress()); + ActorSelection serverActor = workerRuntime.getActorSystem().actorSelection(serverPath); + TaskTrackerReportInstanceStatusReq reportInstanceStatusReq = new TaskTrackerReportInstanceStatusReq(); + reportInstanceStatusReq.setAppId(workerRuntime.getAppId()); + reportInstanceStatusReq.setJobId(instanceInfo.getJobId()); + reportInstanceStatusReq.setInstanceId(instanceId); + reportInstanceStatusReq.setWfInstanceId(instanceInfo.getWfInstanceId()); + reportInstanceStatusReq.setTotalTaskNum(1); + reportInstanceStatusReq.setReportTime(System.currentTimeMillis()); + reportInstanceStatusReq.setStartTime(createTime); + reportInstanceStatusReq.setSourceAddress(workerRuntime.getWorkerAddress()); + reportInstanceStatusReq.setSucceedTaskNum(0); + reportInstanceStatusReq.setFailedTaskNum(0); + + if (stopFlag.get()) { + if (finished.get()) { + // 已经被成功打断 + destroy(); + return; + } + final Thread workerThread = executeThread.get(); + if (!finished.get() && workerThread != null) { + // 未能成功打断任务,强制停止 + try { + if (tryForceStopThread(workerThread)) { + finished.set(true); + taskEndTime = System.currentTimeMillis(); + result = new ProcessResult(false, SystemInstanceResult.USER_STOP_INSTANCE_FORCE_STOP); + log.warn("[TaskTracker-{}] task need stop, force stop thread {} success!", instanceId, workerThread.getName()); + // 被终止的任务不需要上报状态 + destroy(); + return; + } + } catch (Exception e) { + log.warn("[TaskTracker-{}] task need stop,fail to stop thread {}", instanceId, workerThread.getName(), e); + } + } + } + if (finished.get()) { + if (result.isSuccess()) { + reportInstanceStatusReq.setSucceedTaskNum(1); + reportInstanceStatusReq.setInstanceStatus(InstanceStatus.SUCCEED.getV()); + } else { + reportInstanceStatusReq.setFailedTaskNum(1); + reportInstanceStatusReq.setInstanceStatus(InstanceStatus.FAILED.getV()); + } + // 处理工作流上下文 + if (taskContext.getWorkflowContext().getWfInstanceId() != null) { + reportInstanceStatusReq.setAppendedWfContext(taskContext.getWorkflowContext().getAppendedContextData()); + } + reportInstanceStatusReq.setResult(suit(result.getMsg())); + reportInstanceStatusReq.setEndTime(taskEndTime); + // 微操一下,上报最终状态时重新设置下时间,并且增加一小段偏移,保证在并发上报运行中状态以及最终状态时,最终状态的上报时间晚于运行中的状态 + reportInstanceStatusReq.setReportTime(System.currentTimeMillis() + 1); + reportFinalStatusThenDestroy(serverActor, reportInstanceStatusReq); + return; + } + // 未完成的任务,只需要上报状态 + reportInstanceStatusReq.setInstanceStatus(InstanceStatus.RUNNING.getV()); + log.info("[TaskTracker-{}] report status({}) success,real status is {}", instanceId, reportInstanceStatusReq, status); + serverActor.tell(reportInstanceStatusReq, null); + } + + private void timeoutCheck() { + if (taskStartTime == null || System.currentTimeMillis() - taskStartTime < instanceInfo.getInstanceTimeoutMS()) { + return; + } + if (finished.get() && result != null) { + timeoutCheckScheduledFuture.cancel(true); + return; + } + // 首次判断超时 + if (timeoutFlag.compareAndSet(false, true)) { + // 超时,仅尝试打断任务 + log.warn("[TaskTracker-{}] task timeout,taskStarTime:{},currentTime:{},runningTimeLimit:{}, try to interrupt it.", instanceId, taskStartTime, System.currentTimeMillis(), instanceInfo.getInstanceTimeoutMS()); + processFuture.cancel(true); + return; + } + if (finished.get()) { + // 已经成功被打断 + log.warn("[TaskTracker-{}] task timeout,taskStarTime:{},endTime:{}, interrupt success.", instanceId, taskStartTime, taskEndTime); + return; + } + Thread workerThread = executeThread.get(); + if (workerThread == null) { + return; + } + // 未能成功打断任务,强制终止 + try { + if (tryForceStopThread(workerThread)) { + finished.set(true); + taskEndTime = System.currentTimeMillis(); + result = new ProcessResult(false, SystemInstanceResult.INSTANCE_EXECUTE_TIMEOUT_FORCE_STOP); + log.warn("[TaskTracker-{}] task timeout, force stop thread {} success!", instanceId, workerThread.getName()); + } + } catch (Exception e) { + log.warn("[TaskTracker-{}] task timeout,fail to stop thread {}", instanceId, workerThread.getName(), e); + } + } + + private TaskContext constructTaskContext(ServerScheduleJobReq req, WorkerRuntime workerRuntime) { + final TaskContext context = new TaskContext(); + context.setTaskId(req.getJobId() + "#" + req.getInstanceId()); + context.setJobId(req.getJobId()); + context.setJobParams(req.getJobParams()); + context.setInstanceId(req.getInstanceId()); + context.setInstanceParams(req.getInstanceParams()); + context.setWorkflowContext(new WorkflowContext(req.getWfInstanceId(), req.getInstanceParams())); + context.setOmsLogger(OmsLoggerFactory.build(req.getInstanceId(), req.getLogConfig(), workerRuntime)); + context.setTaskName(TaskConstant.ROOT_TASK_NAME); + context.setMaxRetryTimes(req.getTaskRetryNum()); + context.setCurrentRetryTimes(0); + context.setUserContext(workerRuntime.getWorkerConfig().getUserContext()); + // 轻量级任务不会涉及到任务分片的处理,不需要处理子任务相关的信息 + return context; + } + + private String suit(String result) { + if (StringUtils.isEmpty(result)) { + return ""; + } + final int maxLength = workerRuntime.getWorkerConfig().getMaxResultLength(); + if (result.length() <= maxLength) { + return result; + } + log.warn("[TaskTracker-{}] task's result is too large({}>{}), a part will be discarded.", + instanceId, result.length(), maxLength); + return result.substring(0, maxLength).concat("..."); + } + + /** + * try force stop thread + * + * @param thread thread + * @return stop result + */ + private boolean tryForceStopThread(Thread thread) { + + String threadName = thread.getName(); + + String allowStopThread = System.getProperty(PowerJobDKey.WORKER_ALLOWED_FORCE_STOP_THREAD); + if (!StringUtils.equalsIgnoreCase(allowStopThread, Boolean.TRUE.toString())) { + log.warn("[TaskTracker-{}] PowerJob not allowed to force stop a thread by config", instanceId); + return false; + } + + log.warn("[TaskTracker-{}] fail to interrupt the thread[{}], try to force stop.", instanceId, threadName); + try { + thread.stop(); + return true; + } catch (Throwable t) { + log.warn("[TaskTracker-{}] stop thread[{}] failed, msg: {}", instanceId, threadName, t.getMessage()); + } + return false; + } + +} diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/ConnectionFactory.java b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/ConnectionFactory.java index 7e6e508f..60013e7e 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/ConnectionFactory.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/ConnectionFactory.java @@ -1,6 +1,7 @@ package tech.powerjob.worker.persistence; import tech.powerjob.common.utils.CommonUtils; +import tech.powerjob.common.utils.JavaUtils; import tech.powerjob.worker.common.constants.StoreStrategy; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; @@ -33,6 +34,10 @@ public class ConnectionFactory { } public synchronized void initDatasource(StoreStrategy strategy) { + + // H2 兼容性问题较多,前置输出版本方便排查 + log.info("[PowerDatasource] H2 database version: {}", JavaUtils.determinePackageVersion(Driver.class)); + // 兼容单元测试,否则没办法单独测试 DAO 层了 strategy = strategy == null ? StoreStrategy.DISK : strategy; diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/SimpleTaskQuery.java b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/SimpleTaskQuery.java index b6b96de9..0e9aec88 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/SimpleTaskQuery.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/SimpleTaskQuery.java @@ -1,7 +1,7 @@ package tech.powerjob.worker.persistence; import lombok.Data; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * 简单查询直接类,只支持 select * from task_info where xxx = xxx and xxx = xxx 的查询 diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDAOImpl.java b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDAOImpl.java index f1793a2b..96c4ab89 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDAOImpl.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDAOImpl.java @@ -28,7 +28,7 @@ public class TaskDAOImpl implements TaskDAO { String delTableSQL = "drop table if exists task_info"; // 感谢 Gitee 用户 @Linfly 反馈的 BUG // bigint(20) 与 Java Long 取值范围完全一致 - String createTableSQL = "create table task_info (task_id varchar(255), instance_id bigint(20), sub_instance_id bigint(20), task_name varchar(255), task_content blob, address varchar(255), status int(5), result text, failed_cnt int(11), created_time bigint(20), last_modified_time bigint(20), last_report_time bigint(20), unique KEY pkey (instance_id, task_id))"; + String createTableSQL = "create table task_info (task_id varchar(255), instance_id bigint, sub_instance_id bigint, task_name varchar(255), task_content blob, address varchar(255), status int, result text, failed_cnt int, created_time bigint, last_modified_time bigint, last_report_time bigint, constraint pkey unique (instance_id, task_id))"; try (Connection conn = connectionFactory.getConnection(); Statement stat = conn.createStatement()) { stat.execute(delTableSQL); diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDO.java b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDO.java index 47663953..7d815148 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDO.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskDO.java @@ -2,7 +2,7 @@ package tech.powerjob.worker.persistence; import lombok.Getter; import lombok.Setter; -import org.springframework.util.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * TaskDO(为了简化 DAO 层,一张表实现两种功能) diff --git a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskPersistenceService.java b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskPersistenceService.java index 3d35f16b..97c5794d 100644 --- a/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskPersistenceService.java +++ b/powerjob-worker/src/main/java/tech/powerjob/worker/persistence/TaskPersistenceService.java @@ -29,8 +29,11 @@ public class TaskPersistenceService { private final StoreStrategy strategy; - // 默认重试参数 + /** + * 默认重试次数 + */ private static final int RETRY_TIMES = 3; + private static final long RETRY_INTERVAL_MS = 100; private TaskDAO taskDAO; diff --git a/powerjob-worker/src/test/java/tech/powerjob/worker/persistence/TaskDAOTest.java b/powerjob-worker/src/test/java/tech/powerjob/worker/persistence/TaskDAOTest.java new file mode 100644 index 00000000..a0a4cd1f --- /dev/null +++ b/powerjob-worker/src/test/java/tech/powerjob/worker/persistence/TaskDAOTest.java @@ -0,0 +1,115 @@ +package tech.powerjob.worker.persistence; + +import com.google.common.collect.Lists; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.h2.jdbc.JdbcSQLIntegrityConstraintViolationException; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import tech.powerjob.worker.common.constants.StoreStrategy; +import tech.powerjob.worker.common.constants.TaskStatus; +import tech.powerjob.worker.core.processor.TaskResult; + +import java.nio.charset.StandardCharsets; +import java.sql.SQLIntegrityConstraintViolationException; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * 任务持久化接口测试 + * + * @author tjq + * @since 2022/10/23 + */ +@Slf4j +class TaskDAOTest { + + private static TaskDAO taskDAO; + + @BeforeAll + static void initDAO() throws Exception { + + // 1. 创建对象 + ConnectionFactory connectionFactory = new ConnectionFactory(); + connectionFactory.initDatasource(StoreStrategy.DISK); + taskDAO = new TaskDAOImpl(connectionFactory); + + // 2. 初始化表 + taskDAO.initTable(); + } + + @Test + @SneakyThrows + void testUniqueKey() { + + TaskDO taskDO = buildTaskDO("2", 200000000000000L, TaskStatus.WORKER_PROCESS_FAILED); + boolean firstSaveRet = taskDAO.save(taskDO); + assert firstSaveRet; + log.info("[testUniqueKey] first save result: {}", firstSaveRet); + assertThrows(SQLIntegrityConstraintViolationException.class, () -> { + taskDAO.save(taskDO); + }); + } + + @Test + @SneakyThrows + void testCRUD() { + TaskDO oneTask = buildTaskDO("1", 1L, TaskStatus.WAITING_DISPATCH); + TaskDO twoTask = buildTaskDO("2", 1L, TaskStatus.WAITING_DISPATCH); + TaskDO threeTask = buildTaskDO("99", 1L, TaskStatus.WAITING_DISPATCH); + + boolean batchSave = taskDAO.batchSave(Lists.newArrayList(oneTask, twoTask, threeTask)); + log.info("[testCRUD] batchSave result: {}", batchSave); + assert batchSave; + + SimpleTaskQuery query = new SimpleTaskQuery(); + query.setInstanceId(1L); + List simpleQueryRet = taskDAO.simpleQuery(query); + log.info("[testCRUD] simple query by instanceId's result: {}", simpleQueryRet); + assert simpleQueryRet.size() == 3; + + SimpleTaskQuery deleteQuery = new SimpleTaskQuery(); + deleteQuery.setTaskId("99"); + deleteQuery.setInstanceId(1L); + boolean simpleDelete = taskDAO.simpleDelete(deleteQuery); + log.info("[testCRUD] simpleDelete result: {}", simpleDelete); + assert simpleDelete; + + query.setQueryContent("status, result"); + List> simpleQueryPlusRet = taskDAO.simpleQueryPlus(query); + log.info("[testCRUD] simple query plus by instanceId's result: {}", simpleQueryPlusRet); + assert simpleQueryPlusRet.size() == 2; + assert simpleQueryPlusRet.get(0).get("status") != null; + assert simpleQueryPlusRet.get(0).get("instanceId") == null; + + boolean updateToSuccessRet = taskDAO.updateTaskStatus(1L, "1", TaskStatus.WORKER_PROCESS_SUCCESS.getValue(), System.currentTimeMillis(), "UPDATE_TO_SUCCESS"); + boolean updateToFailedRet = taskDAO.updateTaskStatus(1L, "2", TaskStatus.WORKER_PROCESS_FAILED.getValue(), System.currentTimeMillis(), "UPDATE_TO_FAILED"); + assert updateToSuccessRet; + assert updateToFailedRet; + + List allTaskResult = taskDAO.getAllTaskResult(1L, 1L); + log.info("[testCRUD] allTaskResult: {}", allTaskResult); + assert allTaskResult.size() == 2; + } + + private static TaskDO buildTaskDO(String taskId, Long instanceId, TaskStatus taskStatus) { + TaskDO taskDO = new TaskDO(); + taskDO.setTaskId(taskId); + taskDO.setInstanceId(instanceId); + taskDO.setSubInstanceId(instanceId); + taskDO.setTaskName("TEST_TASK"); + taskDO.setTaskContent("TEST_CONTENT".getBytes(StandardCharsets.UTF_8)); + taskDO.setAddress("127.0.0.1:10086"); + taskDO.setStatus(taskStatus.getValue()); + taskDO.setResult("SUCCESS"); + taskDO.setFailedCnt(0); + taskDO.setLastModifiedTime(System.currentTimeMillis()); + taskDO.setLastReportTime(System.currentTimeMillis()); + taskDO.setCreatedTime(System.currentTimeMillis()); + return taskDO; + } + + +} \ No newline at end of file diff --git a/powerjob-worker/src/test/java/tech/powerjob/worker/test/PersistenceServiceTest.java b/powerjob-worker/src/test/java/tech/powerjob/worker/test/PersistenceServiceTest.java index f922a0b5..6c90b5ed 100644 --- a/powerjob-worker/src/test/java/tech/powerjob/worker/test/PersistenceServiceTest.java +++ b/powerjob-worker/src/test/java/tech/powerjob/worker/test/PersistenceServiceTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.*; import java.util.List; import java.util.concurrent.ThreadLocalRandom; -import static tech.powerjob.worker.core.tracker.task.CommonTaskTracker.ROOT_TASK_ID; +import static tech.powerjob.worker.core.tracker.task.heavy.CommonTaskTracker.ROOT_TASK_ID; /** * H2 数据库持久化测试 diff --git a/powerjob-worker/src/test/java/tech/powerjob/worker/test/function/IdleTest.java b/powerjob-worker/src/test/java/tech/powerjob/worker/test/function/IdleTest.java index 2b98e634..c9f19e35 100644 --- a/powerjob-worker/src/test/java/tech/powerjob/worker/test/function/IdleTest.java +++ b/powerjob-worker/src/test/java/tech/powerjob/worker/test/function/IdleTest.java @@ -7,7 +7,7 @@ import tech.powerjob.common.enums.TimeExpressionType; import tech.powerjob.common.request.ServerScheduleJobReq; import tech.powerjob.worker.common.WorkerRuntime; import tech.powerjob.worker.core.tracker.processor.ProcessorTracker; -import tech.powerjob.worker.core.tracker.task.TaskTracker; +import tech.powerjob.worker.core.tracker.task.heavy.HeavyTaskTracker; import tech.powerjob.worker.pojo.request.ProcessorTrackerStatusReportReq; import tech.powerjob.worker.pojo.request.TaskTrackerStartTaskReq; import org.junit.jupiter.api.Test; @@ -33,7 +33,7 @@ public class IdleTest extends CommonTest { ProcessorTrackerStatusReportReq req = ProcessorTrackerStatusReportReq.buildIdleReport(10086L); ServerScheduleJobReq serverScheduleJobReq = TestUtils.genServerScheduleJobReq(ExecuteType.STANDALONE, TimeExpressionType.API); - TaskTracker taskTracker = TaskTracker.create(serverScheduleJobReq, new WorkerRuntime()); + HeavyTaskTracker taskTracker = HeavyTaskTracker.create(serverScheduleJobReq, new WorkerRuntime()); if (taskTracker != null) { taskTracker.receiveProcessorTrackerHeartbeat(req); }