java-为什么我的线程在没有同步/锁定的情况下被阻塞
我们有一个tomcat 9.0.62服务器,可以在1-3小时内可靠锁定。当我们使用jstack -F
我们看到359个线程处于阻塞状态,9个线程处于in\u NATIVE状态。大量可用内存、大量磁盘、低cpu使用率。我们只是在从OpenJDK 1.8.0u302升级到1.8.0u322(在Solaris上)之后才看到这一点(当我们降级到u302时,问题就消失了)。
根据java文档,阻塞状态意味着;正在等待监视器锁定。我不明白的是,一个没有同步的操作怎么会处于阻塞状态?下面是一个示例Pattern.compile
:
Thread t@1467: (state = BLOCKED)
- java.util.regex.Pattern.compile() @bci=144, line=1693 (Compiled frame)
- java.util.regex.Pattern.(java.lang.String, int) @bci=62, line=1352 (Compiled frame)
- java.util.regex.Pattern.compile(java.lang.String) @bci=6, line=1028 (Compiled frame)
- oracle.jdbc.driver.OracleStatement.(oracle.jdbc.driver.PhysicalConnection, int, int, int, int) @bci=377, line=6198 (Compiled frame)
- oracle.jdbc.driver.OraclePreparedStatement.(oracle.jdbc.driver.PhysicalConnection, java.lang.String, int, int, int, int) @bci=9, line=1363 (Compiled frame)
- oracle.jdbc.driver.T4CPreparedStatement.(oracle.jdbc.driver.PhysicalConnection, java.lang.String, int, int) @bci=14, line=43 (Compiled frame)
- oracle.jdbc.driver.T4CDriverExtension.allocatePreparedStatement(oracle.jdbc.driver.PhysicalConnection, java.lang.String, int, int) @bci=9, line=75 (Compiled frame)
- oracle.jdbc.driver.PhysicalConnection.prepareStatementInternal(java.lang.String, int, int) @bci=120, line=1648 (Compiled frame)
- oracle.jdbc.driver.PhysicalConnection.prepareStatement(java.lang.String, int, int) @bci=4, line=1606 (Compiled frame)
- oracle.jdbc.driver.PhysicalConnection.prepareStatement(java.lang.String) @bci=4, line=1533 (Compiled frame)
- com.zaxxer.hikari.pool.ProxyConnection.prepareStatement(java.lang.String) @bci=7, line=337 (Compiled frame)
- com.zaxxer.hikari.pool.HikariProxyConnection.prepareStatement(java.lang.String) @bci=2 (Compiled frame)
- org.hibernate.internal.NonContextualJdbcConnectionAccess.obtainConnection() @bci=4, line=37 (Compiled frame)
- org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.acquireConnectionIfNeeded() @bci=12, line=108 (Compiled frame)
- org.hibernate.resource.jdbc.internal.LogicalConnectionManagedImpl.getPhysicalConnection() @bci=5, line=138 (Compiled frame)
- org.hibernate.engine.jdbc.internal.StatementPreparerImpl.connection() @bci=4, line=50 (Compiled frame)
- org.hibernate.engine.jdbc.internal.StatementPreparerImpl$1.doPrepare() @bci=11, line=89 (Compiled frame)
- org.hibernate.engine.jdbc.internal.StatementPreparerImpl$StatementPreparationTemplate.prepareStatement() @bci=47, line=176 (Compiled frame)
- org.hibernate.engine.jdbc.internal.StatementPreparerImpl.prepareQueryStatement(java.lang.String, boolean, org.hibernate.ScrollMode) @bci=74, line=151 (Compiled frame)
- org.hibernate.loader.Loader.prepareQueryStatement(java.lang.String, org.hibernate.engine.spi.QueryParameters, org.hibernate.dialect.pagination.LimitHandler, boolean, org.hibernate.engine.spi.SharedSessionContractImplementor) @bci=39, line=2127 (Compiled frame)
- org.hibernate.loader.Loader.executeQueryStatement(java.lang.String, org.hibernate.engine.spi.QueryParameters, boolean, java.util.List, org.hibernate.engine.spi.SharedSessionContractImplementor) @bci=55, line=2059 (Compiled frame)
- org.hibernate.loader.Loader.executeQueryStatement(org.hibernate.engine.spi.QueryParameters, boolean, java.util.List, org.hibernate.engine.spi.SharedSessionContractImplementor) @bci=10, line=2037 (Compiled frame)
- org.hibernate.loader.Loader.doQuery(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, boolean, org.hibernate.transform.ResultTransformer) @bci=44, line=956 (Compiled frame)
- org.hibernate.loader.Loader.doQueryAndInitializeNonLazyCollections(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, boolean, org.hibernate.transform.ResultTransformer) @bci=62, line=357 (Compiled frame)
- org.hibernate.loader.Loader.doQueryAndInitializeNonLazyCollections(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, boolean) @bci=5, line=327 (Compiled frame)
- org.hibernate.loader.Loader.loadEntity(org.hibernate.engine.spi.SharedSessionContractImplementor, java.lang.Object, org.hibernate.type.Type, java.lang.Object, java.lang.String, java.io.Serializable, org.hibernate.persister.entity.EntityPersister, org.hibernate.LockOptions, java.lang.Boolean) @bci=116, line=2440 (Compiled frame)
- org.hibernate.loader.entity.AbstractEntityLoader.load(org.hibernate.engine.spi.SharedSessionContractImplementor, java.lang.Object, java.lang.Object, java.io.Serializable, org.hibernate.LockOptions, java.lang.Boolean) @bci=22, line=77 (Compiled frame)
- org.hibernate.loader.entity.AbstractEntityLoader.load(java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.LockOptions) @bci=7, line=66 (Compiled frame)
- org.hibernate.persister.entity.AbstractEntityPersister.loadByUniqueKey(java.lang.String, java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor) @bci=11, line=2486 (Compiled frame)
- org.hibernate.type.EntityType.loadByUniqueKey(java.lang.String, java.lang.String, java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor) @bci=90, line=756 (Compiled frame)
- org.hibernate.type.EntityType.resolve(java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor, java.lang.Object, java.lang.Boolean) @bci=50, line=467 (Compiled frame)
- org.hibernate.engine.internal.TwoPhaseLoad$EntityResolver.lambda$static$0(org.hibernate.type.EntityType, java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor, java.lang.Object, java.lang.Boolean) @bci=6, line=576 (Compiled frame)
- org.hibernate.engine.internal.TwoPhaseLoad$EntityResolver$$Lambda$1426.resolve(org.hibernate.type.EntityType, java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor, java.lang.Object, java.lang.Boolean) @bci=7 (Compiled frame)
- org.hibernate.engine.internal.TwoPhaseLoad.initializeEntityEntryLoadedState(java.lang.Object, org.hibernate.engine.spi.EntityEntry, org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.internal.TwoPhaseLoad$EntityResolver) @bci=348, line=221 (Compiled frame)
- org.hibernate.engine.internal.TwoPhaseLoad.initializeEntity(java.lang.Object, boolean, org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.event.spi.PreLoadEvent, org.hibernate.engine.internal.TwoPhaseLoad$EntityResolver) @bci=48, line=156 (Compiled frame)
- org.hibernate.engine.internal.TwoPhaseLoad.initializeEntity(java.lang.Object, boolean, org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.event.spi.PreLoadEvent) @bci=7, line=126 (Compiled frame)
- org.hibernate.loader.Loader.initializeEntitiesAndCollections(java.util.List, java.lang.Object, org.hibernate.engine.spi.SharedSessionContractImplementor, boolean, java.util.List) @bci=171, line=1201 (Compiled frame)
- org.hibernate.loader.Loader.doQuery(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, boolean, org.hibernate.transform.ResultTransformer) @bci=102, line=979 (Compiled frame)
- org.hibernate.loader.Loader.doQueryAndInitializeNonLazyCollections(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, boolean, org.hibernate.transform.ResultTransformer) @bci=62, line=357 (Compiled frame)
- org.hibernate.loader.Loader.doList(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, org.hibernate.transform.ResultTransformer) @bci=38, line=2868 (Compiled frame)
- org.hibernate.loader.Loader.doList(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters) @bci=4, line=2850 (Compiled frame)
- org.hibernate.loader.Loader.listIgnoreQueryCache(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters) @bci=4, line=2682 (Compiled frame)
- org.hibernate.loader.Loader.list(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters, java.util.Set, org.hibernate.type.Type[]) @bci=49, line=2677 (Compiled frame)
- org.hibernate.loader.hql.QueryLoader.list(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters) @bci=19, line=540 (Compiled frame)
- org.hibernate.hql.internal.ast.QueryTranslatorImpl.list(org.hibernate.engine.spi.SharedSessionContractImplementor, org.hibernate.engine.spi.QueryParameters) @bci=179, line=400 (Compiled frame)
- org.apache.log4j.Category.getEffectiveLevel() @bci=23, line=442 (Compiled frame)
- org.apache.log4j.Category.isEnabledFor(org.apache.log4j.Priority) @bci=20, line=751 (Compiled frame)
- org.jboss.logging.Log4jLogger.isEnabled(org.jboss.logging.Logger$Level) @bci=10, line=36 (Compiled frame)
- org.jboss.logging.Logger.isTraceEnabled() @bci=4, line=98 (Compiled frame)
- org.jboss.logging.DelegatingBasicLogger.isTraceEnabled() @bci=4, line=54 (Compiled frame)
- org.hibernate.engine.query.spi.HQLQueryPlan.performList(org.hibernate.engine.spi.QueryParameters, org.hibernate.engine.spi.SharedSessionContractImplementor) @bci=3, line=195 (Compiled frame)
- org.hibernate.internal.SessionImpl.list(java.lang.String, org.hibernate.engine.spi.QueryParameters) @bci=53, line=1454 (Compiled frame)
- org.hibernate.query.internal.AbstractProducedQuery.doList() @bci=85, line=1649 (Compiled frame)
- org.hibernate.query.internal.AbstractProducedQuery.list() @bci=5, line=1617 (Compiled frame)
- org.hibernate.query.Query.getResultList() @bci=1, line=165 (Compiled frame)
...
使现代化我已经检查了GC日志,在我的任何tomcat应用程序发出的最后一行日志后4秒,我看到了这个条目:
2022-05-19T00:57:45.782+0000: 2602.365: [GC pause (G1 Evacuation Pause) (young)
Desired survivor size 73400320 bytes, new threshold 15 (max 15)
- age 1: 9497240 bytes, 9497240 total
, 0.1257611 secs]
[Parallel Time: 111.6 ms, GC Workers: 15]
[GC Worker Start (ms): Min: 2602366.7, Avg: 2602366.8, Max: 2602366.9, Diff: 0.2]
[Ext Root Scanning (ms): Min: 1.4, Avg: 4.1, Max: 24.4, Diff: 22.9, Sum: 61.3]
[Update RS (ms): Min: 6.9, Avg: 26.4, Max: 29.4, Diff: 22.5, Sum: 396.2]
[Processed Buffers: Min: 4, Avg: 18.6, Max: 27, Diff: 23, Sum: 279]
[Scan RS (ms): Min: 19.5, Avg: 20.2, Max: 20.9, Diff: 1.4, Sum: 303.3]
[Code Root Scanning (ms): Min: 0.0, Avg: 0.0, Max: 0.0, Diff: 0.0, Sum: 0.1]
[Object Copy (ms): Min: 60.2, Avg: 60.4, Max: 60.7, Diff: 0.6, Sum: 906.7]
[Termination (ms): Min: 0.0, Avg: 0.1, Max: 0.1, Diff: 0.1, Sum: 1.2]
[Termination Attempts: Min: 1, Avg: 194.5, Max: 260, Diff: 259, Sum: 2917]
[GC Worker Other (ms): Min: 0.0, Avg: 0.1, Max: 0.2, Diff: 0.2, Sum: 2.1]
[GC Worker Total (ms): Min: 111.2, Avg: 111.4, Max: 111.5, Diff: 0.3, Sum: 1671.0]
[GC Worker End (ms): Min: 2602478.1, Avg: 2602478.2, Max: 2602478.3, Diff: 0.2]
[Code Root Fixup: 0.0 ms]
[Code Root Purge: 0.0 ms]
[String Dedup Fixup: 7.5 ms, GC Workers: 15]
[Queue Fixup (ms): Min: 0.0, Avg: 0.0, Max: 0.0, Diff: 0.0, Sum: 0.0]
[Table Fixup (ms): Min: 7.0, Avg: 7.2, Max: 7.4, Diff: 0.4, Sum: 108.1]
[Clear CT: 0.6 ms]
[Other: 6.0 ms]
[Choose CSet: 0.0 ms]
[Ref Proc: 0.5 ms]
[Ref Enq: 0.0 ms]
[Redirty Cards: 0.4 ms]
[Humongous Register: 0.4 ms]
[Humongous Reclaim: 0.1 ms]
[Free CSet: 2.9 ms]
[Eden: 1098.0M(1098.0M)->0.0B(774.0M) Survivors: 20480.0K->77824.0K Heap: 3829.9M(6144.0M)->2788.9M(6144.0M)]
[Times: user=1.78 sys=0.00, real=0.13 secs]
缺少的是这样一行,它紧跟在日志中的其他GC条目之后:
2022-05-19T00:57:42.081+0000: 2598.664: Total time for which application threads were stopped: 0.3016893 seconds, Stopping threads took: 0.0002452 seconds
这一行丢失的事实向我表明,GC没有设法恢复线程。鉴于此,我假设这是OpenJDK 1.8.0u322中的一个GC错误。
以下是tomcat进程中与内存和GC相关的设置:
-server
-Xms2400m
-Xmx6g
-XX:+UseStringDeduplication
-XX:CodeCacheMinimumFreeSpace=25m
-XX:ReservedCodeCacheSize=200m
-XX:-TieredCompilation
-XX:MaxMetaspaceSize=1000m
-XX:+UseG1GC
-XX:MaxGCPauseMillis=200
-Xloggc:gc.log.20220519_0014
-verbose:gc
-XX:+PrintGCDateStamps
-XX:+PrintGCDetails
-XX:+PrintGCCause
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintGCApplicationConcurrentTime
-XX:+PrintTenuringDistribution
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=20
-XX:GCLogFileSize=50M
-XX:+HeapDumpOnOutOfMemoryError
-d64
这是每个线程堆栈跟踪的第一行:https ://gist.github.com/davidmoten/c3b19afbfce981fec1f0722ec231f354 。在该列表中,您可以看到许多没有同步的无害调用(对我来说指向 GC)。
这也是pstack调用 tomcat 进程的结果(这次是不同的实例,抱歉,但行为相同):https ://gist.github.com/davidmoten/699cf6e601fe018b8a2e1466d32ed6c7
在 pstack 输出中,我看到类似下面的线程跟踪,其中 Quartz 调度程序尝试创建一个新线程但正在等待 GC(方法 GC_locker::stall_until_clear)。tomcat 进程的 CPU 使用率为 0.03%,我在列表中看不到活动的 GC 线程,所以我假设 GC stop-the-world 线程已经完成,而没有恢复被阻塞的线程。我想我的下一步是报告 OpenJDK 的错误。
------------ lwp# 134 / thread# 134 lname: Scheduler_QuartzSchedulerThread ---------------
fffffffeffae24a8 lwp_cond_wait (105365a48, 105365a30, 0, 0)
fffffffefee8f508 __1cCosNPlatformEventEpark6M_v_ (105365a00, cfbb8, cf800, 0, fffffffeff562aa8, 105365a30) + 100
fffffffefee1ab54 __1cHMonitorFIWait6MpnGThread_l_i_ (1001371b0, 1052fb800, 0, 1001371b0, 71ab8, 0) + a4
fffffffefee1b9b8 __1cHMonitorEwait6Mblb_b_ (1001371b0, 1052fb800, 0, 1b80, 2, 104a9e790) + 2c0
fffffffefe926714 __1cJGC_lockerRstall_until_clear6F_v_ (1001371b0, 8e308, 8e000, 0, fffffffeff562aa8, fffffffeff5f0db0) + 18c
fffffffefe8de400 __1cPG1CollectedHeapXattempt_allocation_slow6MLCpI1_pnIHeapWord__ (10014c830, 105, 100168318, fffffffeff5d5050, ffffffff4acaee5c, fffffffeff562aa8) + 300
fffffffefe8dde00 __1cPG1CollectedHeapRallocate_new_tlab6ML_pnIHeapWord__ (10014c830, 105, 642200000, fffffffeff5d0780, 9a800, 828) + 74
fffffffefe7938e4 __1cNCollectedHeapXallocate_from_tlab_slow6FnLKlassHandle_pnGThread_L_pnIHeapWord__ (17, 105, 3, 10, fffffffeff562aa8, 1052fb858) + 17c
fffffffefe59327c __1cNCollectedHeapbAcommon_mem_allocate_noinit6FnLKlassHandle_LpnGThread__pnIHeapWord__ (7c2017bf0, 3, 1052fb800, 0, fffffffeff562aa8, 0) + 84
fffffffefe9bc8f8 __1cNInstanceKlassRallocate_instance6MpnGThread__pnPinstanceOopDesc__ (40000000, 1052fb800, fffffffefe9ba1f8, fffffffeff5e2220, cfc00, 0) + 40
fffffffefea0a69c __1cSInterpreterRuntimeE_new6FpnKJavaThread_pnMConstantPool_i_v_ (fffffffeff5e2220, ffffffff4d0819f0, 0, 1052fb800, fffffffeff5f83c0, fffffffeff562aa8) + 64c
ffffffff72822888 * org/quartz/simpl/RAMJobStore.acquireNextTriggers(JIJ)Ljava/util/List;+8
ffffffff72808998 * org/quartz/core/QuartzSchedulerThread.run()V+165
ffffffff728002a0 * StubRoutines (1)
fffffffefea26430 __1cJJavaCallsLcall_helper6FpnJJavaValue_pnMmethodHandle_pnRJavaCallArguments_pnGThread__v_ (ffffffff4acafab0, 1, ffffffff4acaf958, ffffffff728114c0, ffffffff4ba9df18, 1052fb800) + 7c0
fffffffefea249b0 __1cJJavaCallsMcall_virtual6FpnJJavaValue_nLKlassHandle_pnGSymbol_5pnRJavaCallArguments_pnGThread__v_ (ffffffff4acafab0, 1052fb800, 18, ffffffff4ba9df18, ffffffff4acaf958, 1052fb800) + 370
fffffffefea24d00 __1cJJavaCallsMcall_virtual6FpnJJavaValue_nGHandle_nLKlassHandle_pnGSymbol_6pnGThread__v_ (8, 10419d7a0, 2, 1, ffffffff4acaf9a1, ffffffff4acaf960) + 4c
fffffffefeb62790 __1cMthread_entry6FpnKJavaThread_pnGThread__v_ (10419d7a0, e, 97800, 10419d790, fffffffeff5fa518, 6c1a88988) + d0
fffffffeff0437f0 __1cKJavaThreadRthread_main_inner6M_v_ (1052fb800, fffffffefeb626c0, 90298, 0, fffffffeff562aa8, 902a0) + 90
fffffffefee80a50 java_start (fffffffefe7dbff0, 1052fb800, c3800, c3acc, fffffffeff562aa8, fffffffeff5fba30) + 380
fffffffeffadd43c _lwp_start (0, 0, 0, 0, 0, 0)