mirror of
https://github.com/actiontech/dble.git
synced 2026-05-03 13:00:17 -05:00
Merge branch 'master' into inner-2234&2336
This commit is contained in:
@@ -70,7 +70,20 @@
|
||||
<DefaultRolloverStrategy max="10"/>
|
||||
</RollingFile>
|
||||
-->
|
||||
<RollingFile name="ThreadChecker" fileName="logs/thread.log"
|
||||
filePattern="logs/$${date:yyyy-MM}/thread-%d{MM-dd}-%i.log.gz">
|
||||
<PatternLayout>
|
||||
<Pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %5p [%t] (%l) - %m%n</Pattern>
|
||||
</PatternLayout>
|
||||
<Policies>
|
||||
<OnStartupTriggeringPolicy/>
|
||||
<SizeBasedTriggeringPolicy size="250 MB"/>
|
||||
<TimeBasedTriggeringPolicy/>
|
||||
</Policies>
|
||||
<DefaultRolloverStrategy max="10"/>
|
||||
</RollingFile>
|
||||
</Appenders>
|
||||
|
||||
<Loggers>
|
||||
<!-- independent log file for new ha interface, for use useOuterHa only
|
||||
<Logger name="ha_log" additivity="false" includeLocation="false" >
|
||||
@@ -87,7 +100,9 @@
|
||||
<!-- <AppenderRef ref="DumpFileLog" />-->
|
||||
<!-- <AppenderRef ref="RollingFile"/>-->
|
||||
<!-- </Logger>-->
|
||||
|
||||
<Logger name="ThreadChecker" additivity="false" includeLocation="false">
|
||||
<AppenderRef ref="ThreadChecker"/>
|
||||
</Logger>
|
||||
<asyncRoot level="info" includeLocation="true">
|
||||
<!--<AppenderRef ref="Console" />-->
|
||||
<AppenderRef ref="RollingFile"/>
|
||||
|
||||
@@ -45,6 +45,7 @@ import com.actiontech.dble.services.manager.information.ManagerSchemaInfo;
|
||||
import com.actiontech.dble.singleton.*;
|
||||
import com.actiontech.dble.statistic.sql.StatisticManager;
|
||||
import com.actiontech.dble.statistic.stat.ThreadWorkUsage;
|
||||
import com.actiontech.dble.singleton.ThreadChecker;
|
||||
import com.actiontech.dble.util.ExecutorUtil;
|
||||
import com.actiontech.dble.util.TimeUtil;
|
||||
import com.google.common.collect.Maps;
|
||||
@@ -293,6 +294,7 @@ public final class DbleServer {
|
||||
LOGGER.info(server.getName() + " is started and listening on " + server.getPort());
|
||||
LOGGER.info("=====================================Server started success=======================================");
|
||||
|
||||
ThreadCheckerScheduler.getInstance().init();
|
||||
Scheduler.getInstance().init(timerExecutor);
|
||||
LOGGER.info("=======================================Scheduler started==========================================");
|
||||
|
||||
@@ -337,7 +339,7 @@ public final class DbleServer {
|
||||
backendExecutor = ExecutorUtil.createFixed(BACKEND_WORKER_NAME, SystemConfig.getInstance().getBackendWorker(), runnableMap);
|
||||
writeToBackendExecutor = ExecutorUtil.createFixed(WRITE_TO_BACKEND_WORKER_NAME, SystemConfig.getInstance().getWriteToBackendWorker(), runnableMap);
|
||||
complexQueryExecutor = ExecutorUtil.createCached(COMPLEX_QUERY_EXECUTOR_NAME, SystemConfig.getInstance().getComplexQueryWorker(), null);
|
||||
timerExecutor = ExecutorUtil.createFixed(TIMER_WORKER_NAME, 1);
|
||||
timerExecutor = ExecutorUtil.createCached(TIMER_WORKER_NAME, 1, 2, ThreadChecker.getInstance());
|
||||
nioFrontExecutor = ExecutorUtil.createFixed(NIO_FRONT_RW, frontProcessorCount, runnableMap);
|
||||
nioBackendExecutor = ExecutorUtil.createFixed(NIO_BACKEND_RW, backendProcessorCount, runnableMap);
|
||||
}
|
||||
@@ -459,30 +461,34 @@ public final class DbleServer {
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
for (IOProcessor p : backendProcessors) {
|
||||
p.checkBackendCons();
|
||||
try {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
for (IOProcessor p : backendProcessors) {
|
||||
p.checkBackendCons();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOGGER.info("checkBackendCons caught err:", e);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOGGER.info("checkBackendCons caught err:", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
for (IOProcessor p : frontProcessors) {
|
||||
p.checkFrontCons();
|
||||
});
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
for (IOProcessor p : frontProcessors) {
|
||||
p.checkFrontCons();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOGGER.info("checkFrontCons caught err:", e);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOGGER.info("checkFrontCons caught err:", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
ThreadChecker.getInstance().timerExecuteError(e, "processorCheck()");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -45,4 +45,5 @@ public final class AlarmCode {
|
||||
public static final String DB_MASTER_INSTANCE_DELAY_FAIL = "DB_MASTER_INSTANCE_DELAY_FAIL";
|
||||
public static final String SLOW_QUERY_QUEUE_POLICY_ABORT = "SLOW_QUERY_QUEUE_POLICY_ABORT";
|
||||
public static final String SLOW_QUERY_QUEUE_POLICY_WAIT = "SLOW_QUERY_QUEUE_POLICY_WAIT";
|
||||
public static final String THREAD_SUSPECTED_HANG = "DBLE_THREAD_SUSPECTED_HANG"; //Resolve by trigger
|
||||
}
|
||||
|
||||
@@ -24,4 +24,5 @@ public final class ToResolveContainer {
|
||||
public static final Set<String> XA_WRITE_CHECK_POINT_FAIL = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
|
||||
public static final Set<String> DB_INSTANCE_LOWER_CASE_ERROR = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
|
||||
public static final Set<String> DB_SLAVE_INSTANCE_DELAY = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
|
||||
public static final Set<String> THREAD_SUSPECTED_HANG = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
|
||||
}
|
||||
|
||||
@@ -42,6 +42,9 @@ public class RWSplitNonBlockingSession extends Session {
|
||||
public static final Logger LOGGER = LoggerFactory.getLogger(RWSplitNonBlockingSession.class);
|
||||
|
||||
private volatile BackendConnection conn;
|
||||
|
||||
//previous preserve conn
|
||||
private volatile BackendConnection backupConn;
|
||||
private final RWSplitService rwSplitService;
|
||||
private PhysicalDbGroup rwGroup;
|
||||
|
||||
@@ -314,9 +317,20 @@ public class RWSplitNonBlockingSession extends Session {
|
||||
if (LOGGER.isDebugEnabled()) {
|
||||
LOGGER.debug("route sql {} to {}", sql, dbInstance);
|
||||
}
|
||||
if (rwSplitService.isInTransaction()) {
|
||||
if (conn != null) {
|
||||
backupConn = conn;
|
||||
conn = null;
|
||||
}
|
||||
rwSplitService.setForceUseAutoCommit(true);
|
||||
}
|
||||
RWSplitHandler handler = new RWSplitHandler(rwSplitService, false, null, callback);
|
||||
dbInstance.getConnection(rwSplitService.getSchema(), handler, null, false);
|
||||
} catch (Exception e) {
|
||||
if (rwSplitService.isForceUseAutoCommit()) {
|
||||
conn = backupConn;
|
||||
backupConn = null;
|
||||
}
|
||||
rwSplitService.executeException(e, sql);
|
||||
}
|
||||
}
|
||||
@@ -350,8 +364,11 @@ public class RWSplitNonBlockingSession extends Session {
|
||||
|
||||
public void unbindIfSafe() {
|
||||
final BackendConnection tmp = conn;
|
||||
if (tmp != null && rwSplitService.isKeepBackendConn()) {
|
||||
if (tmp != null && !rwSplitService.isKeepBackendConn()) {
|
||||
this.conn = null;
|
||||
conn = backupConn;
|
||||
backupConn = null;
|
||||
|
||||
if (rwSplitService.isFlowControlled()) {
|
||||
releaseConnectionFromFlowControlled(tmp);
|
||||
}
|
||||
@@ -363,16 +380,28 @@ public class RWSplitNonBlockingSession extends Session {
|
||||
|
||||
public void unbind() {
|
||||
this.conn = null;
|
||||
conn = backupConn;
|
||||
backupConn = null;
|
||||
}
|
||||
|
||||
public void close(String reason) {
|
||||
if (null != rwGroup) {
|
||||
rwGroup.unBindRwSplitSession(this);
|
||||
}
|
||||
final BackendConnection tmp = this.conn;
|
||||
this.conn = null;
|
||||
if (tmp != null) {
|
||||
tmp.close(reason);
|
||||
{
|
||||
final BackendConnection tmp = this.conn;
|
||||
this.conn = null;
|
||||
if (tmp != null) {
|
||||
tmp.close(reason);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
final BackendConnection tmp = this.backupConn;
|
||||
this.backupConn = null;
|
||||
if (tmp != null) {
|
||||
tmp.close(reason);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -237,9 +237,9 @@ public final class ShowHelp {
|
||||
|
||||
HELPS.put("show @@statistic", "Turn off statistic information");
|
||||
HELPS.put("enable @@statistic", "Turn on statistic sql");
|
||||
HELPS.put("enable @@enableStatisticAnalysis", "Turn on statistic analysis sql('show @@sql.sum.user/table' or 'show @@sql.condition')");
|
||||
HELPS.put("enable @@statisticAnalysis", "Turn on statistic analysis sql('show @@sql.sum.user/table' or 'show @@sql.condition')");
|
||||
HELPS.put("disable @@statistic", "Turn off statistic sql");
|
||||
HELPS.put("disable @@enableStatisticAnalysis", "Turn off statistic analysis sql('show @@sql.sum.user/table' or 'show @@sql.condition')");
|
||||
HELPS.put("disable @@statisticAnalysis", "Turn off statistic analysis sql('show @@sql.sum.user/table' or 'show @@sql.condition')");
|
||||
HELPS.put("reload @@statistic_table_size = ? [where table='?' | where table in (dble_information.tableA,...)]", "Statistic table size");
|
||||
HELPS.put("reload @@samplingRate=?", "Reset the samplingRate size");
|
||||
HELPS.put("show @@statistic_queue.usage", "Show the queue usage");
|
||||
|
||||
@@ -125,12 +125,15 @@ public class MySQLResponseService extends BackendService {
|
||||
|
||||
write(originPacket, WriteFlags.QUERY_END);
|
||||
}
|
||||
|
||||
public void execute(BusinessService service, String sql) {
|
||||
execute(service, sql, false);
|
||||
}
|
||||
|
||||
public void execute(BusinessService service, String sql, boolean forceUseAutoCommit) {
|
||||
boolean changeUser = isChangeUser(service);
|
||||
if (changeUser) return;
|
||||
|
||||
StringBuilder synSQL = getSynSql(null, null, service.isAutocommit(), service);
|
||||
StringBuilder synSQL = getSynSql(null, null, forceUseAutoCommit || service.isAutocommit(), service);
|
||||
if (protocolResponseHandler != defaultResponseHandler) {
|
||||
protocolResponseHandler = defaultResponseHandler;
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ public class RWSplitHandler implements ResponseHandler, LoadDataResponseHandler,
|
||||
mysqlService.execute(rwSplitService, originPacket);
|
||||
} else if (!StringUtil.isEmpty(executeSql)) {
|
||||
// such as: Hint sql (remove comment sentences)
|
||||
mysqlService.execute(rwSplitService, executeSql);
|
||||
mysqlService.execute(rwSplitService, executeSql, rwSplitService.isForceUseAutoCommit());
|
||||
} else {
|
||||
// not happen
|
||||
mysqlService.execute(rwSplitService, rwSplitService.getExecuteSqlBytes());
|
||||
|
||||
@@ -77,6 +77,8 @@ public class RWSplitService extends BusinessService<SingleDbGroupUserConfig> {
|
||||
|
||||
// prepare statement
|
||||
private ConcurrentHashMap<Long, PreparedStatementHolder> psHolder = new ConcurrentHashMap<>();
|
||||
private boolean forceUseAutoCommit = false;
|
||||
|
||||
|
||||
public RWSplitService(AbstractConnection connection, AuthResultInfo info) {
|
||||
super(connection, info);
|
||||
@@ -116,6 +118,13 @@ public class RWSplitService extends BusinessService<SingleDbGroupUserConfig> {
|
||||
session.trace(t -> t.setRequestTime());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean beforeHandlingTask(@NotNull ServiceTask task) {
|
||||
//initialize value for the REQUEST
|
||||
setForceUseAutoCommit(false);
|
||||
return super.beforeHandlingTask(task);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleInnerData(byte[] data) {
|
||||
session.trace(t -> t.startProcess());
|
||||
@@ -400,7 +409,8 @@ public class RWSplitService extends BusinessService<SingleDbGroupUserConfig> {
|
||||
}
|
||||
|
||||
public boolean isKeepBackendConn() {
|
||||
return !isInTransaction() && !isInLoadData() && psHolder.isEmpty() && !isLockTable() && !isUsingTmpTable() && nameSet.isEmpty();
|
||||
boolean releaseConn = (!isInTransaction() || isForceUseAutoCommit()) && !isInLoadData() && psHolder.isEmpty() && !isLockTable() && !isUsingTmpTable() && nameSet.isEmpty();
|
||||
return !releaseConn;
|
||||
}
|
||||
|
||||
public boolean isInitDb() {
|
||||
@@ -423,6 +433,14 @@ public class RWSplitService extends BusinessService<SingleDbGroupUserConfig> {
|
||||
this.multiHandler = multiHandler;
|
||||
}
|
||||
|
||||
public boolean isForceUseAutoCommit() {
|
||||
return forceUseAutoCommit;
|
||||
}
|
||||
|
||||
public void setForceUseAutoCommit(boolean forceUseAutoCommit) {
|
||||
this.forceUseAutoCommit = forceUseAutoCommit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void killAndClose(String reason) {
|
||||
session.close(reason);
|
||||
|
||||
@@ -18,10 +18,7 @@ import com.actiontech.dble.util.TimeUtil;
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
import static com.actiontech.dble.server.NonBlockingSession.LOGGER;
|
||||
|
||||
@@ -40,7 +37,7 @@ public final class Scheduler {
|
||||
private ScheduledExecutorService scheduledExecutor;
|
||||
|
||||
private Scheduler() {
|
||||
this.scheduledExecutor = Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setNameFormat("TimerScheduler-%d").build());
|
||||
this.scheduledExecutor = Executors.newScheduledThreadPool(2, new ThreadFactoryBuilder().setNameFormat("TimerScheduler-%d").build());
|
||||
}
|
||||
|
||||
public void init(ExecutorService executor) {
|
||||
@@ -84,26 +81,29 @@ public final class Scheduler {
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
long sqlTimeout = SystemConfig.getInstance().getSqlExecuteTimeout() * 1000L;
|
||||
//close connection if now -lastTime>sqlExecuteTimeout
|
||||
long currentTime = TimeUtil.currentTimeMillis();
|
||||
Iterator<PooledConnection> iterator = IOProcessor.BACKENDS_OLD.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
PooledConnection con = iterator.next();
|
||||
long lastTime = con.getLastTime();
|
||||
if (con.isClosed() || con.getState() != PooledConnection.STATE_IN_USE || currentTime - lastTime > sqlTimeout) {
|
||||
con.close("clear old backend connection ...");
|
||||
iterator.remove();
|
||||
long sqlTimeout = SystemConfig.getInstance().getSqlExecuteTimeout() * 1000L;
|
||||
//close connection if now -lastTime>sqlExecuteTimeout
|
||||
long currentTime = TimeUtil.currentTimeMillis();
|
||||
Iterator<PooledConnection> iterator = IOProcessor.BACKENDS_OLD.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
PooledConnection con = iterator.next();
|
||||
long lastTime = con.getLastTime();
|
||||
if (con.isClosed() || con.getState() != PooledConnection.STATE_IN_USE || currentTime - lastTime > sqlTimeout) {
|
||||
con.close("clear old backend connection ...");
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
ThreadChecker.getInstance().timerExecuteError(e, "dbInstanceOldConsClear()");
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@@ -111,35 +111,53 @@ public final class Scheduler {
|
||||
* after reload @@config_all ,clean old dbGroup
|
||||
*/
|
||||
private Runnable oldDbGroupClear() {
|
||||
return () -> timerExecutor.execute(() -> {
|
||||
Iterator<PhysicalDbGroup> iterator = IOProcessor.BACKENDS_OLD_GROUP.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
PhysicalDbGroup dbGroup = iterator.next();
|
||||
boolean isStop = dbGroup.stopOfBackground("[background task]reload config, recycle old group");
|
||||
LOGGER.info("[background task]recycle old group:{},result:{}", dbGroup.getGroupName(), isStop);
|
||||
if (isStop) {
|
||||
iterator.remove();
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
timerExecutor.execute(() -> {
|
||||
Iterator<PhysicalDbGroup> iterator = IOProcessor.BACKENDS_OLD_GROUP.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
PhysicalDbGroup dbGroup = iterator.next();
|
||||
boolean isStop = dbGroup.stopOfBackground("[background task]reload config, recycle old group");
|
||||
LOGGER.info("[background task]recycle old group:{},result:{}", dbGroup.getGroupName(), isStop);
|
||||
if (isStop) {
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
ThreadChecker.getInstance().timerExecuteError(e, "oldDbGroupClear()");
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* after reload @@config_all ,clean old dbInstance
|
||||
*/
|
||||
private Runnable oldDbInstanceClear() {
|
||||
return () -> timerExecutor.execute(() -> {
|
||||
Iterator<PhysicalDbInstance> iterator = IOProcessor.BACKENDS_OLD_INSTANCE.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
PhysicalDbInstance dbInstance = iterator.next();
|
||||
boolean isStop = dbInstance.stopOfBackground("[background task]reload config, recycle old dbInstance");
|
||||
LOGGER.info("[background task]recycle old dbInstance:{},result:{}", dbInstance, isStop);
|
||||
if (isStop) {
|
||||
iterator.remove();
|
||||
dbInstance.getDbGroup().setState(PhysicalDbGroup.INITIAL);
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
timerExecutor.execute(() -> {
|
||||
Iterator<PhysicalDbInstance> iterator = IOProcessor.BACKENDS_OLD_INSTANCE.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
PhysicalDbInstance dbInstance = iterator.next();
|
||||
boolean isStop = dbInstance.stopOfBackground("[background task]reload config, recycle old dbInstance");
|
||||
LOGGER.info("[background task]recycle old dbInstance:{},result:{}", dbInstance, isStop);
|
||||
if (isStop) {
|
||||
iterator.remove();
|
||||
dbInstance.getDbGroup().setState(PhysicalDbGroup.INITIAL);
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
ThreadChecker.getInstance().timerExecuteError(e, "oldDbInstanceClear()");
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -148,12 +166,16 @@ public final class Scheduler {
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
XASessionCheck.getInstance().checkSessions();
|
||||
}
|
||||
});
|
||||
try {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
XASessionCheck.getInstance().checkSessions();
|
||||
}
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
ThreadChecker.getInstance().timerExecuteError(e, "xaSessionCheck()");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -162,12 +184,16 @@ public final class Scheduler {
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
XAStateLog.cleanCompleteRecoveryLog();
|
||||
}
|
||||
});
|
||||
try {
|
||||
timerExecutor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
XAStateLog.cleanCompleteRecoveryLog();
|
||||
}
|
||||
});
|
||||
} catch (RejectedExecutionException e) {
|
||||
ThreadChecker.getInstance().timerExecuteError(e, "xaLogClean()");
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -0,0 +1,234 @@
|
||||
package com.actiontech.dble.singleton;
|
||||
|
||||
import com.actiontech.dble.DbleServer;
|
||||
import com.actiontech.dble.alarm.AlarmCode;
|
||||
import com.actiontech.dble.alarm.Alert;
|
||||
import com.actiontech.dble.alarm.AlertUtil;
|
||||
import com.actiontech.dble.alarm.ToResolveContainer;
|
||||
import com.actiontech.dble.util.NameableExecutor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
|
||||
import static com.actiontech.dble.DbleServer.TIMER_WORKER_NAME;
|
||||
|
||||
public class ThreadChecker {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger("ThreadChecker");
|
||||
private static long checkTimeNs = 10 * 1000 * 1000000L; // 10s
|
||||
|
||||
private static final ThreadChecker INSTANCE = new ThreadChecker();
|
||||
public final Map<Thread, TimeRecord> timeRecords = new ConcurrentHashMap<>(4);
|
||||
|
||||
public void startExec(Thread t) {
|
||||
long ns = System.nanoTime();
|
||||
TimeRecord tr = timeRecords.get(t);
|
||||
if (tr == null) {
|
||||
tr = new TimeRecord();
|
||||
timeRecords.put(t, tr);
|
||||
}
|
||||
tr.setStartTime(ns);
|
||||
tr.setEndTime(0);
|
||||
}
|
||||
|
||||
public void endExec() {
|
||||
TimeRecord tr = timeRecords.get(Thread.currentThread());
|
||||
long ns = System.nanoTime();
|
||||
if (tr != null) {
|
||||
tr.setEndTime(ns);
|
||||
}
|
||||
}
|
||||
|
||||
public void terminated() {
|
||||
timeRecords.remove(Thread.currentThread());
|
||||
}
|
||||
|
||||
public void doSelfCheck() {
|
||||
// check single thread
|
||||
for (Map.Entry<Thread, TimeRecord> entry : timeRecords.entrySet()) {
|
||||
doSelfCheck0(entry.getKey(), entry.getValue());
|
||||
}
|
||||
tryResolveThreadPoolAlarm();
|
||||
}
|
||||
|
||||
private void doSelfCheck0(Thread th, TimeRecord tr) {
|
||||
String threadName = th.getName();
|
||||
LastRecordInfo current = getInfo(threadName, th, tr.getStartTime(), tr.getEndTime());
|
||||
if (current == null) return;
|
||||
if (tr.getLastInfo() == null) {
|
||||
tr.setLastInfo(current);
|
||||
} else {
|
||||
LastRecordInfo previous = tr.getLastInfo();
|
||||
tr.setLastInfo(current);
|
||||
boolean isSuspected = false;
|
||||
String key = current.getName();
|
||||
// diff
|
||||
if (current.getEndTime() == 0 && previous.getEndTime() == 0 && current.getStartTime() == previous.getStartTime()) {
|
||||
long nowTime = System.nanoTime();
|
||||
long timeDiff = nowTime - current.getStartTime();
|
||||
if (timeDiff > checkTimeNs) { // more than 10s will log
|
||||
String msg = "Thread[" + key + "] suspected hang, execute time:[{" + timeDiff / 1000000L + "ms}] more than 10s, currentState:[" + current.getState() + "]";
|
||||
LOGGER.info(msg + ", stackTrace: {}", getStackTrace(key));
|
||||
// if there is task accumulation in the queue, it means that all threads are hang
|
||||
if (previous.getCompletedTask() == current.getCompletedTask() && current.getActiveTaskCount() == previous.getActiveTaskCount()) {
|
||||
LOGGER.info("The thread pool where the thread[" + key + "] is located is in the hang state and cannot work. Trigger alarm");
|
||||
isSuspected = true;
|
||||
Map<String, String> labels = AlertUtil.genSingleLabel("thread", key);
|
||||
AlertUtil.alertSelf(AlarmCode.THREAD_SUSPECTED_HANG, Alert.AlertLevel.WARN, msg, labels);
|
||||
ToResolveContainer.THREAD_SUSPECTED_HANG.add(key);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!isSuspected && ToResolveContainer.THREAD_SUSPECTED_HANG.contains(key)) {
|
||||
LOGGER.info("Resolve Thread[" + key + "]'s alarm");
|
||||
AlertUtil.alertSelfResolve(AlarmCode.THREAD_SUSPECTED_HANG, Alert.AlertLevel.WARN, AlertUtil.genSingleLabel("thread", key),
|
||||
ToResolveContainer.THREAD_SUSPECTED_HANG, key);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void tryResolveThreadPoolAlarm() {
|
||||
NameableExecutor timerExecutor = (NameableExecutor) DbleServer.getInstance().getTimerExecutor();
|
||||
String key = timerExecutor.getName();
|
||||
if (!ToResolveContainer.THREAD_SUSPECTED_HANG.contains(key))
|
||||
return;
|
||||
if (!timerExecutor.isShutdown() && !timerExecutor.isTerminated() &&
|
||||
timerExecutor.getQueue().size() != 1024 && timerExecutor.getPoolSize() != timerExecutor.getActiveCount()) {
|
||||
LOGGER.info("Resolve ThreadPool[" + key + "]'s alarm");
|
||||
AlertUtil.alertSelfResolve(AlarmCode.THREAD_SUSPECTED_HANG, Alert.AlertLevel.WARN, AlertUtil.genSingleLabel("thread", key),
|
||||
ToResolveContainer.THREAD_SUSPECTED_HANG, key);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
public void timerExecuteError(RejectedExecutionException exception, String method) {
|
||||
NameableExecutor timerExecutor = (NameableExecutor) DbleServer.getInstance().getTimerExecutor();
|
||||
String key = timerExecutor.getName();
|
||||
if (!ToResolveContainer.THREAD_SUSPECTED_HANG.contains(key)) { // only one
|
||||
String msg = "ThreadPool[" + TIMER_WORKER_NAME + "] execute fail, isShutDown[" + timerExecutor.
|
||||
isShutdown() + "], task_queue_size[" + timerExecutor.getQueue().size() + "]";
|
||||
|
||||
LOGGER.info(msg + ", happened at '{}' method, exception:{}", method, exception);
|
||||
LOGGER.info("Trigger ThreadPool[" + key + "]'s alarm");
|
||||
|
||||
Map<String, String> labels = AlertUtil.genSingleLabel("thread", key);
|
||||
AlertUtil.alertSelf(AlarmCode.THREAD_SUSPECTED_HANG, Alert.AlertLevel.WARN, msg + ", For details, see logs/thread.log", labels);
|
||||
ToResolveContainer.THREAD_SUSPECTED_HANG.add(key);
|
||||
}
|
||||
}
|
||||
|
||||
private String getStackTrace(String threadName) {
|
||||
for (Map.Entry<Thread, TimeRecord> entry : timeRecords.entrySet()) {
|
||||
StackTraceElement[] st = entry.getKey().getStackTrace();
|
||||
if (threadName.equals(entry.getKey().getName())) {
|
||||
StringBuilder sbf = new StringBuilder();
|
||||
for (StackTraceElement e : st) {
|
||||
sbf.append("\n\tat ");
|
||||
sbf.append(e);
|
||||
}
|
||||
return sbf.toString();
|
||||
}
|
||||
}
|
||||
return "empty";
|
||||
}
|
||||
|
||||
private LastRecordInfo getInfo(String name, Thread th, long lastExecTime, long lastFinishTime) {
|
||||
String[] arr = name.split("-");
|
||||
if (arr.length == 2) {
|
||||
switch (arr[1]) {
|
||||
case TIMER_WORKER_NAME:
|
||||
NameableExecutor exec = (NameableExecutor) DbleServer.getInstance().getTimerExecutor();
|
||||
return new LastRecordInfo(name, th.getState(), lastExecTime, lastFinishTime, exec.getActiveCount(), exec.getQueue().size(), exec.getCompletedTaskCount());
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ThreadChecker getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
static class TimeRecord {
|
||||
volatile long startTime;
|
||||
volatile long endTime;
|
||||
volatile LastRecordInfo lastInfo;
|
||||
|
||||
public long getStartTime() {
|
||||
return startTime;
|
||||
}
|
||||
|
||||
public void setStartTime(long startTime) {
|
||||
this.startTime = startTime;
|
||||
}
|
||||
|
||||
public long getEndTime() {
|
||||
return endTime;
|
||||
}
|
||||
|
||||
public void setEndTime(long endTime) {
|
||||
this.endTime = endTime;
|
||||
}
|
||||
|
||||
public LastRecordInfo getLastInfo() {
|
||||
return lastInfo;
|
||||
}
|
||||
|
||||
public void setLastInfo(LastRecordInfo lastInfo) {
|
||||
this.lastInfo = lastInfo;
|
||||
}
|
||||
}
|
||||
|
||||
static class LastRecordInfo {
|
||||
String name;
|
||||
long startTime;
|
||||
long endTime;
|
||||
long activeTaskCount;
|
||||
long taskQueueSize;
|
||||
long completedTask;
|
||||
Thread.State state;
|
||||
|
||||
LastRecordInfo(String name, Thread.State state, long startTime, long endTime, long activeTaskCount, long taskQueueSize, long completedTask) {
|
||||
this.name = name;
|
||||
this.state = state;
|
||||
this.startTime = startTime;
|
||||
this.endTime = endTime;
|
||||
this.activeTaskCount = activeTaskCount;
|
||||
this.taskQueueSize = taskQueueSize;
|
||||
this.completedTask = completedTask;
|
||||
}
|
||||
|
||||
public Thread.State getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public long getStartTime() {
|
||||
return startTime;
|
||||
}
|
||||
|
||||
public long getEndTime() {
|
||||
return endTime;
|
||||
}
|
||||
|
||||
public long getActiveTaskCount() {
|
||||
return activeTaskCount;
|
||||
}
|
||||
|
||||
public long getTaskQueueSize() {
|
||||
return taskQueueSize;
|
||||
}
|
||||
|
||||
public long getCompletedTask() {
|
||||
return completedTask;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
package com.actiontech.dble.singleton;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class ThreadCheckerScheduler {
|
||||
public static final Logger LOGGER = LoggerFactory.getLogger(ThreadCheckerScheduler.class);
|
||||
private static final ThreadCheckerScheduler INSTANCE = new ThreadCheckerScheduler();
|
||||
private ScheduledExecutorService scheduledExecutor;
|
||||
|
||||
public ThreadCheckerScheduler() {
|
||||
this.scheduledExecutor = Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setNameFormat("ThreadChecker-%d").
|
||||
setUncaughtExceptionHandler((Thread threads, Throwable e) -> LOGGER.warn("unknown exception ", e)).build());
|
||||
}
|
||||
|
||||
public void init() {
|
||||
scheduledExecutor.scheduleAtFixedRate(checkThread(), 0L, 2, TimeUnit.MINUTES);
|
||||
}
|
||||
|
||||
private Runnable checkThread() {
|
||||
return new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
ThreadChecker.getInstance().doSelfCheck();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static ThreadCheckerScheduler getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
}
|
||||
@@ -229,7 +229,7 @@ public class SqlStatisticHandler implements StatisticDataHandler {
|
||||
}
|
||||
|
||||
public String getSqlDigest() {
|
||||
if (init.compareAndSet(false, true)) {
|
||||
if (init.compareAndSet(false, true) || this.sqlDigest == null) {
|
||||
try {
|
||||
if (stmt.equalsIgnoreCase("begin")) {
|
||||
this.sqlDigest = "begin";
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
package com.actiontech.dble.util;
|
||||
|
||||
import com.actiontech.dble.singleton.ThreadChecker;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
@@ -20,6 +21,10 @@ public final class ExecutorUtil {
|
||||
return createFixed(name, null, size, true, null);
|
||||
}
|
||||
|
||||
public static NameableExecutor createFixed(String name, int size, ThreadChecker checker) {
|
||||
return createFixed(name, null, size, true, null, checker);
|
||||
}
|
||||
|
||||
public static NameableExecutor createFixed(String name, int size, Map<String, Map<Thread, Runnable>> runnableMap) {
|
||||
return createFixed(name, null, size, true, runnableMap);
|
||||
}
|
||||
@@ -30,7 +35,12 @@ public final class ExecutorUtil {
|
||||
|
||||
private static NameableExecutor createFixed(String namePrefix, String nameSuffix, int size, boolean isDaemon, Map<String, Map<Thread, Runnable>> runnableMap) {
|
||||
NameableThreadFactory factory = new NameableThreadFactory(namePrefix, nameSuffix, isDaemon);
|
||||
return new NameableExecutor(namePrefix, size, size, Long.MAX_VALUE, new LinkedBlockingQueue<>(), factory, runnableMap);
|
||||
return new NameableExecutor(namePrefix, size, size, Long.MAX_VALUE, new LinkedBlockingQueue<>(), factory, runnableMap, null);
|
||||
}
|
||||
|
||||
private static NameableExecutor createFixed(String namePrefix, String nameSuffix, int size, boolean isDaemon, Map<String, Map<Thread, Runnable>> runnableMap, ThreadChecker checker) {
|
||||
NameableThreadFactory factory = new NameableThreadFactory(namePrefix, nameSuffix, isDaemon);
|
||||
return new NameableExecutor(namePrefix, size, size, Long.MAX_VALUE, new LinkedBlockingQueue<>(), factory, runnableMap, checker);
|
||||
}
|
||||
|
||||
public static NameableExecutor createCached(String name, int size) {
|
||||
@@ -43,6 +53,18 @@ public final class ExecutorUtil {
|
||||
|
||||
private static NameableExecutor createCached(String name, int size, boolean isDaemon, Map<String, Map<Thread, Runnable>> runnableMap) {
|
||||
NameableThreadFactory factory = new NameableThreadFactory(name, isDaemon);
|
||||
return new NameableExecutor(name, size, Integer.MAX_VALUE, 60, new SynchronousQueue<>(), factory, runnableMap);
|
||||
return new NameableExecutor(name, size, Integer.MAX_VALUE, 60, new SynchronousQueue<>(), factory, runnableMap, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* When the size number of threads takes a long time to execute or hangs, and the queue is full,
|
||||
* create a new thread to continue working, and the number of newly created threads does not exceed maxSize
|
||||
* <p>
|
||||
* If maxSize threads are all hanging, only 1024 tasks will be stored in the queue (this avoids memory leaks),
|
||||
* and subsequent tasks will be discarded by default
|
||||
*/
|
||||
public static NameableExecutor createCached(String name, int size, int maxSize, ThreadChecker checker) {
|
||||
NameableThreadFactory factory = new NameableThreadFactory(name, true);
|
||||
return new NameableExecutor(name, size, maxSize, 60, new LinkedBlockingQueue<>(256), factory, null, checker);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,10 +5,13 @@
|
||||
*/
|
||||
package com.actiontech.dble.util;
|
||||
|
||||
import com.actiontech.dble.singleton.ThreadChecker;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* @author mycat
|
||||
@@ -18,22 +21,26 @@ public class NameableExecutor extends ThreadPoolExecutor {
|
||||
protected String name;
|
||||
|
||||
private Map<String, Map<Thread, Runnable>> runnableMap;
|
||||
private ThreadChecker threadChecker = null;
|
||||
|
||||
public NameableExecutor(String name, int size, int maximumPoolSize, long keepAliveTime,
|
||||
BlockingQueue<Runnable> queue, ThreadFactory factory, Map<String, Map<Thread, Runnable>> runnableMap) {
|
||||
BlockingQueue<Runnable> queue, ThreadFactory factory, Map<String, Map<Thread, Runnable>> runnableMap, ThreadChecker threadChecker) {
|
||||
super(size, maximumPoolSize, keepAliveTime, TimeUnit.SECONDS, queue, factory);
|
||||
this.name = name;
|
||||
this.runnableMap = runnableMap;
|
||||
this.threadChecker = threadChecker;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void beforeExecute(Thread t, Runnable r) {
|
||||
super.beforeExecute(t, r);
|
||||
if (threadChecker != null) {
|
||||
threadChecker.startExec(t);
|
||||
}
|
||||
if (null != runnableMap) {
|
||||
Map<Thread, Runnable> map = Maps.newConcurrentMap();
|
||||
map.put(t, r);
|
||||
@@ -43,4 +50,20 @@ public class NameableExecutor extends ThreadPoolExecutor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void afterExecute(Runnable r, Throwable t) {
|
||||
super.afterExecute(r, t);
|
||||
if (threadChecker != null) {
|
||||
threadChecker.endExec();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void terminated() {
|
||||
super.terminated();
|
||||
if (threadChecker != null) {
|
||||
threadChecker.terminated();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,6 +73,19 @@
|
||||
<DefaultRolloverStrategy max="10"/>
|
||||
</RollingFile>
|
||||
|
||||
<RollingFile name="ThreadChecker" fileName="logs/thread.log"
|
||||
filePattern="logs/$${date:yyyy-MM}/thread-%d{MM-dd}-%i.log.gz">
|
||||
<PatternLayout>
|
||||
<Pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} %5p [%t] (%l) - %m%n</Pattern>
|
||||
</PatternLayout>
|
||||
<Policies>
|
||||
<OnStartupTriggeringPolicy/>
|
||||
<SizeBasedTriggeringPolicy size="250 MB"/>
|
||||
<TimeBasedTriggeringPolicy/>
|
||||
</Policies>
|
||||
<DefaultRolloverStrategy max="10"/>
|
||||
</RollingFile>
|
||||
|
||||
</Appenders>
|
||||
<Loggers>
|
||||
<AsyncLogger name="ha_log" additivity="false" includeLocation="false">
|
||||
@@ -89,6 +102,11 @@
|
||||
<AppenderRef ref="DumpFileLog"/>
|
||||
<AppenderRef ref="RollingFile"/>
|
||||
</AsyncLogger>
|
||||
<AsyncLogger name="ThreadChecker" additivity="false" includeLocation="false">
|
||||
<AppenderRef ref="ThreadChecker"/>
|
||||
<AppenderRef ref="Console"/>
|
||||
<AppenderRef ref="RollingFile"/>
|
||||
</AsyncLogger>
|
||||
|
||||
|
||||
<asyncRoot level="debug" includeLocation="true">
|
||||
|
||||
Reference in New Issue
Block a user