opt omsOnlineLog

This commit is contained in:
tjq 2020-05-07 13:02:32 +08:00
parent 7aa6d2cc6f
commit b35dc15194

View File

@ -11,9 +11,10 @@ import com.github.kfcfans.oms.server.persistence.mongodb.InstanceLogMetadata;
import com.github.kfcfans.oms.server.service.instance.InstanceManager; import com.github.kfcfans.oms.server.service.instance.InstanceManager;
import com.google.common.base.Stopwatch; import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues; import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang3.time.FastDateFormat; import org.apache.commons.lang3.time.FastDateFormat;
import org.springframework.beans.BeanUtils; import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -24,15 +25,12 @@ import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.CollectionUtils; import org.springframework.util.CollectionUtils;
import org.springframework.util.FileCopyUtils;
import java.io.*; import java.io.*;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Map;
import java.util.concurrent.Executor; import java.util.concurrent.*;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
@ -51,11 +49,8 @@ public class InstanceLogService {
private LocalInstanceLogRepository localInstanceLogRepository; private LocalInstanceLogRepository localInstanceLogRepository;
// 本地维护了在线日志的任务实例ID // 本地维护了在线日志的任务实例ID
private final Set<Long> instanceIds = Sets.newConcurrentHashSet(); private final Map<Long, Long> instanceId2LastReportTime = Maps.newConcurrentMap();
// 可重入锁也太坑了吧需要考虑同一个线程重复下载的问题 -> 因为下载交给了额外的线程去做... private final ExecutorService workerPool;
private final int lockNum;
private final Lock[] locks;
private final Executor workerPool;
// 格式化时间戳 // 格式化时间戳
private static final FastDateFormat dateFormat = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss.SSS"); private static final FastDateFormat dateFormat = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss.SSS");
@ -65,17 +60,10 @@ public class InstanceLogService {
private static final int MAX_LINE_COUNT = 500; private static final int MAX_LINE_COUNT = 500;
// 过期时间 // 过期时间
private static final long EXPIRE_INTERVAL_MS = 60000; private static final long EXPIRE_INTERVAL_MS = 60000;
// 小文件阈值(2M)
private static final int SMALL_FILE_MAX_SIZE = 2 * 1024 * 1024;
public InstanceLogService() { public InstanceLogService() {
lockNum = Runtime.getRuntime().availableProcessors(); int coreSize = Runtime.getRuntime().availableProcessors();
locks = new ReentrantLock[lockNum]; workerPool = new ThreadPoolExecutor(coreSize, coreSize, 1, TimeUnit.MINUTES, Queues.newLinkedBlockingQueue());
for (int i = 0; i < lockNum; i++) {
locks[i] = new ReentrantLock();
}
workerPool = new ThreadPoolExecutor(lockNum, lockNum, 1, TimeUnit.MINUTES, Queues.newLinkedBlockingQueue());
} }
/** /**
@ -86,7 +74,7 @@ public class InstanceLogService {
public void submitLogs(String workerAddress, List<InstanceLogContent> logs) { public void submitLogs(String workerAddress, List<InstanceLogContent> logs) {
List<LocalInstanceLogDO> logList = logs.stream().map(x -> { List<LocalInstanceLogDO> logList = logs.stream().map(x -> {
instanceIds.add(x.getInstanceId()); instanceId2LastReportTime.put(x.getInstanceId(), System.currentTimeMillis());
LocalInstanceLogDO y = new LocalInstanceLogDO(); LocalInstanceLogDO y = new LocalInstanceLogDO();
BeanUtils.copyProperties(x, y); BeanUtils.copyProperties(x, y);
@ -107,59 +95,11 @@ public class InstanceLogService {
* @param index 页码 * @param index 页码
* @return 文本字符串 * @return 文本字符串
*/ */
@Transactional(readOnly = true) @Transactional
public StringPage fetchInstanceLog(Long instanceId, long index) { public StringPage fetchInstanceLog(Long instanceId, long index) {
File logFile = new File(genLogFilePath(instanceId));
Lock lock = locks[(int) (instanceId % lockNum)];
lock.lock();
try { try {
long logCount = localInstanceLogRepository.countByInstanceId(instanceId); Future<File> fileFuture = prepareLogFile(instanceId);
File logFile = fileFuture.get(5, TimeUnit.SECONDS);
// 直接从本地数据库构建日志文件
if (logCount != 0) {
// 存在则判断上次更改时间1分钟内有效
if (logFile.exists()) {
long offset = System.currentTimeMillis() - logFile.lastModified();
// 过期才选择重新构建文件
if (offset > EXPIRE_INTERVAL_MS) {
Stream<LocalInstanceLogDO> logStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId);
// 这里直接用 Controller 线程执行毕竟本地持久化用不了多少时间因此不用考虑可重入锁的问题
stream2File(logStream, instanceId);
}
}
}else {
if (gridFsTemplate == null) {
return StringPage.simple("There is no local log for this task now, you need to use mongoDB to store the past logs.");
}
// 不存在需要重新下载
if (!logFile.exists()) {
GridFsResource gridFsResource = gridFsTemplate.getResource(genLogFileName(instanceId));
if (!gridFsResource.exists()) {
return StringPage.simple("There is no online log for this job instance.");
}
long targetFileSize = gridFsResource.contentLength();
// 小文件 Controller 线程直接执行大文件则异步下载先返回 downloading...
if (targetFileSize <= SMALL_FILE_MAX_SIZE) {
gridFs2File(gridFsResource, logFile);
}else {
workerPool.execute(() -> gridFs2File(gridFsResource, logFile));
return StringPage.simple("downloading from mongoDB, please retry some time later~");
}
}
}
if (!logFile.exists()) {
return StringPage.simple("There is no online log for this task instance now.");
}
// 分页展示数据 // 分页展示数据
long lines = 0; long lines = 0;
@ -178,18 +118,35 @@ public class InstanceLogService {
} }
}catch (Exception e) { }catch (Exception e) {
log.warn("[InstanceLogService] read logFile from disk failed.", e); log.warn("[InstanceLogService] read logFile from disk failed.", e);
return StringPage.simple("oms-server execution exception, caused by " + ExceptionUtils.getRootCauseMessage(e));
} }
double totalPage = Math.ceil(1.0 * lines / MAX_LINE_COUNT); double totalPage = Math.ceil(1.0 * lines / MAX_LINE_COUNT);
return new StringPage(index, (long) totalPage, sb.toString()); return new StringPage(index, (long) totalPage, sb.toString());
}catch (TimeoutException te) {
return StringPage.simple("log file is being prepared, please try again later.");
}catch (Exception e) { }catch (Exception e) {
log.error("[InstanceLogService] fetchInstanceLog for instance(instanceId={}) failed.", instanceId, e); log.warn("[InstanceLogService] fetchInstanceLog failed for instance(instanceId={}).", instanceId, e);
return StringPage.simple("unknown error from oms-server, please see oms-server's log to find the problem"); return StringPage.simple("oms-server execution exception, caused by " + ExceptionUtils.getRootCauseMessage(e));
}finally {
lock.unlock();
} }
} }
/**
* 异步准备日志文件
* @param instanceId 任务实例ID
* @return 异步结果
*/
private Future<File> prepareLogFile(long instanceId) {
return workerPool.submit(() -> {
// 在线日志还在不断更新需要使用本地数据库中的数据
if (instanceId2LastReportTime.containsKey(instanceId)) {
return genTemporaryLogFile(instanceId);
}
return genStableLogFile(instanceId);
});
}
/** /**
* 将本地的任务实例运行日志同步到 mongoDB 存储在任务执行结束后异步执行 * 将本地的任务实例运行日志同步到 mongoDB 存储在任务执行结束后异步执行
* @param instanceId 任务实例ID * @param instanceId 任务实例ID
@ -205,52 +162,90 @@ public class InstanceLogService {
} }
Stopwatch sw = Stopwatch.createStarted(); Stopwatch sw = Stopwatch.createStarted();
if (gridFsTemplate != null) {
File logFile = new File(genLogFilePath(instanceId));
// 先持久化到本地磁盘
try { try {
Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId); // 先持久化到本地文件
stream2File(allLogStream, instanceId); File stableLogFile = genStableLogFile(instanceId);
}catch (Exception e) { // 将文件推送到 MongoDB
log.warn("[InstanceLogService] get log stream failed for instance(instanceId={}).", instanceId, e); if (gridFsTemplate != null) {
} try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(stableLogFile))) {
// 推送到 mongoDB
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(logFile))) {
InstanceLogMetadata metadata = new InstanceLogMetadata(); InstanceLogMetadata metadata = new InstanceLogMetadata();
metadata.setInstanceId(instanceId); metadata.setInstanceId(instanceId);
metadata.setFileSize(logFile.length()); metadata.setFileSize(stableLogFile.length());
metadata.setCreatedTime(System.currentTimeMillis()); metadata.setCreatedTime(System.currentTimeMillis());
gridFsTemplate.store(bis, genMongoFileName(instanceId), metadata);
gridFsTemplate.store(bis, genLogFileName(instanceId), metadata); log.info("[InstanceLogService] push local instanceLogs(instanceId={}) to mongoDB succeed, using: {}.", instanceId, sw.stop());
}catch (Exception e) { }catch (Exception e) {
log.warn("[InstanceLogService] push local instanceLogs(instanceId={}) to mongoDB failed.", instanceId, e); log.warn("[InstanceLogService] push local instanceLogs(instanceId={}) to mongoDB failed.", instanceId, e);
} }
} }
// 删除本地数据
try {
long total = CommonUtils.executeWithRetry0(() -> localInstanceLogRepository.deleteByInstanceId(instanceId));
instanceIds.remove(instanceId);
log.info("[InstanceLogService] sync local instanceLogs(instanceId={}) to mongoDB succeed, total logs: {},using: {}.", instanceId, total, sw.stop());
}catch (Exception e) { }catch (Exception e) {
log.warn("[InstanceLogService] delete local instanceLogs failed.", e); log.warn("[InstanceLogService] sync local instanceLogs(instanceId={}) failed.", instanceId, e);
}
// 删除本地数据库数据
try {
CommonUtils.executeWithRetry0(() -> localInstanceLogRepository.deleteByInstanceId(instanceId));
instanceId2LastReportTime.remove(instanceId);
}catch (Exception e) {
log.warn("[InstanceLogService] delete local instanceLog(instanceId={}) failed.", instanceId, e);
}
}
private File genTemporaryLogFile(long instanceId) throws IOException {
String path = genLogFilePath(instanceId, false);
synchronized (("tpFileLock-" + instanceId).intern()) {
File f = new File(path);
// 如果文件存在且有效则不再重新构建日志文件这个判断也需要放在锁内否则构建到一半的文件会被返回
if (f.exists() && (System.currentTimeMillis() - f.lastModified()) < EXPIRE_INTERVAL_MS) {
return f;
}
// 重新构建文件
try (Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId)) {
stream2File(allLogStream, f);
}
return f;
}
}
private File genStableLogFile(long instanceId) throws IOException {
String path = genLogFilePath(instanceId, true);
synchronized (("stFileLock-" + instanceId).intern()) {
File f = new File(path);
if (f.exists()) {
return f;
}
// 本地存在数据从本地持久化对应 SYNC 的情况
if (instanceId2LastReportTime.containsKey(instanceId)) {
try (Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId)) {
stream2File(allLogStream, f);
}
}else {
if (gridFsTemplate == null) {
FileCopyUtils.copy("SYSTEM: There is no local log for this task now, you need to use mongoDB to store the past logs.".getBytes(), f);
return f;
}
// 否则从 mongoDB 拉取数据对应后期查询的情况
GridFsResource gridFsResource = gridFsTemplate.getResource(genMongoFileName(instanceId));
if (!gridFsResource.exists()) {
FileCopyUtils.copy("SYSTEM: There is no online log for this job instance.".getBytes(), f);
return f;
}
gridFs2File(gridFsResource, f);
}
return f;
} }
} }
/** /**
* 将数据库中存储的日志流转化为磁盘日志文件 * 将数据库中存储的日志流转化为磁盘日志文件
* @param stream * @param stream
* @param instanceId 任务实例ID * @param logFile 目标日志文件
*/ */
private void stream2File(Stream<LocalInstanceLogDO> stream, long instanceId) { private void stream2File(Stream<LocalInstanceLogDO> stream, File logFile) throws IOException {
File logFile = new File(genLogFilePath(instanceId));
if (!logFile.getParentFile().exists()) { if (!logFile.getParentFile().exists()) {
if (!logFile.getParentFile().mkdirs()) { if (!logFile.getParentFile().mkdirs()) {
log.warn("[InstanceLogService] create dir for instanceLog failed, path is {}.", logFile.getPath()); log.warn("[InstanceLogService] create dir for instanceLog failed, path is {}.", logFile.getPath());
@ -264,10 +259,6 @@ public class InstanceLogService {
}catch (Exception ignore) { }catch (Exception ignore) {
} }
}); });
}catch (Exception e) {
log.warn("[InstanceLogService] write instanceLog(instanceId={}) to local file failed.", instanceId, e);
}finally {
stream.close();
} }
} }
@ -276,7 +267,7 @@ public class InstanceLogService {
* @param gridFsResource mongoDB 文件资源 * @param gridFsResource mongoDB 文件资源
* @param logFile 本地文件资源 * @param logFile 本地文件资源
*/ */
private void gridFs2File(GridFsResource gridFsResource, File logFile) { private void gridFs2File(GridFsResource gridFsResource, File logFile) throws IOException {
byte[] buffer = new byte[1024]; byte[] buffer = new byte[1024];
try (BufferedInputStream gis = new BufferedInputStream(gridFsResource.getInputStream()); try (BufferedInputStream gis = new BufferedInputStream(gridFsResource.getInputStream());
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(logFile)) BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(logFile))
@ -285,21 +276,17 @@ public class InstanceLogService {
bos.write(buffer); bos.write(buffer);
} }
bos.flush(); bos.flush();
}catch (Exception e) {
log.warn("[InstanceLogService] download instanceLog to local file({}) failed.", logFile.getName(), e);
} }
} }
/** /**
* 拼接日志 -> 2020-04-29 22:07:10.059 192.168.1.1:2777 INFO XXX * 拼接日志 -> 2020-04-29 22:07:10.059 192.168.1.1:2777 INFO XXX
* @param instanceLog 日志对象 * @param instanceLog 日志对象
* @return 字符串 * @return 字符串
*/ */
private static String convertLog(LocalInstanceLogDO instanceLog) { private static String convertLog(LocalInstanceLogDO instanceLog) {
String pattern = "%s [%s] -%s"; return String.format("%s [%s] -%s", dateFormat.format(instanceLog.getLogTime()), instanceLog.getWorkerAddress(), instanceLog.getLogContent());
return String.format(pattern, dateFormat.format(instanceLog.getLogTime()), instanceLog.getLogContent());
} }
@ -309,7 +296,7 @@ public class InstanceLogService {
// 1. 定时删除秒级任务的日志 // 1. 定时删除秒级任务的日志
List<Long> frequentInstanceIds = Lists.newLinkedList(); List<Long> frequentInstanceIds = Lists.newLinkedList();
instanceIds.forEach(instanceId -> { instanceId2LastReportTime.keySet().forEach(instanceId -> {
JobInfoDO jobInfo = InstanceManager.fetchJobInfo(instanceId); JobInfoDO jobInfo = InstanceManager.fetchJobInfo(instanceId);
if (jobInfo == null) { if (jobInfo == null) {
return; return;
@ -331,13 +318,20 @@ public class InstanceLogService {
} }
}); });
} }
// 2. 删除长时间未 REPORT 的日志
} }
private static String genLogFileName(long instanceId) {
return String.format("%d.log", instanceId); private static String genLogFilePath(long instanceId, boolean stable) {
if (stable) {
return USER_HOME + "/oms/online_log/" + String.format("%d-stable.log", instanceId);
}else {
return USER_HOME + "/oms/online_log/" + String.format("%d-temporary.log", instanceId);
} }
private static String genLogFilePath(long instanceId) { }
return USER_HOME + "/oms/online_log/" + genLogFileName(instanceId); private static String genMongoFileName(long instanceId) {
return String.format("oms-%d.log", instanceId);
} }
@Autowired(required = false) @Autowired(required = false)