gpt4 book ai didi

awk - sed 或 awk : group by paragraphs consisting of 2nd upto n+1th lines of each paragraph

转载 作者:行者123 更新时间:2023-12-04 10:31:54 25 4
gpt4 key购买 nike

我需要计算线程转储中相同子段落的数量。我无法使用 sed 提取每个分段的第 2 行至第 n+1 行。 awk 也可以使用

例如,给定以下示例 threaddump.txt

"RMI TCP Accept-0" Id=11 RUNNABLE (in native)
at java.net.PlainSocketImpl.socketAccept(Native Method)
at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
at java.net.ServerSocket.implAccept(ServerSocket.java:545)
at java.net.ServerSocket.accept(ServerSocket.java:513)
at sun.management.jmxremote.LocalRMIServerSocketFactory$1.accept(LocalRMIServerSocketFactory.java:52)
at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.executeAcceptLoop(TCPTransport.java:400)
at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.run(TCPTransport.java:372)
at java.lang.Thread.run(Thread.java:745)

"AMQP Connection 10.170.10.128:5672" Id=227 RUNNABLE (in native)
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
at java.io.BufferedInputStream.read(BufferedInputStream.java:265)
at java.io.DataInputStream.readUnsignedByte(DataInputStream.java:288)
at com.rabbitmq.client.impl.Frame.readFrom(Frame.java:95)
at com.rabbitmq.client.impl.SocketFrameHandler.readFrame(SocketFrameHandler.java:139)
at com.rabbitmq.client.impl.AMQConnection$MainLoop.run(AMQConnection.java:536)
at java.lang.Thread.run(Thread.java:745)

"http-bio-10.104.42.237-16210-exec-12" Id=90 RUNNABLE (in native)
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.coyote.http11.InternalInputBuffer.fill(InternalInputBuffer.java:534)
at org.apache.coyote.http11.InternalInputBuffer.fill(InternalInputBuffer.java:519)
at org.apache.coyote.http11.Http11Processor.setRequestLineReadTimeout(Http11Processor.java:174)
at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1048)
at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:637)
at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:318)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Thread.java:745)

"Signal Dispatcher" Id=6 RUNNABLE

"kafcli-poller-10" Id=277 RUNNABLE (in native)
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
at org.apache.kafka.common.network.Selector.select(Selector.java:686)
at org.apache.kafka.common.network.Selector.poll(Selector.java:408)
at org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:460)
at org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:261)
at org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:233)
at org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:1171)
at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:1115)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)

"localhost-startStop-1-SendThread(zk0007.svc.prod.wd1.wd:2181)" Id=59 RUNNABLE (in native)
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
at org.apache.zookeeper.ClientCnxnSocketNIO.doTransport(ClientCnxnSocketNIO.java:345)
at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1214)

如果 n=3,输出将是(请注意每个子堆栈开头的计数):
2   at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)

2 at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)

1 at java.net.PlainSocketImpl.socketAccept(Native Method)
at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
at java.net.ServerSocket.implAccept(ServerSocket.java:545)

因为
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)

在线程转储中出现两次;等等等等。

这是一个三步过程:
  • 提取所有 RUNNABLE paragraghs,又名 RUNNABLE 堆栈。这是使用以下 syntax 成功完成的:
  • cat threaddump.txt | sed -e '/./{H;$!d;}' -e 'x;/ RUNNABLE/!d;' > RUNNABLE.txt
  • 对于每个堆栈(或段落),提取第 2 行至第 n+1 行。我尝试了以下许多不同的组合,尝试使用 sed 的“q”选项来选择行,但无济于事。我不会列出基于这些的所有其他尝试 examples . awk 也可以工作,但无法将保持模式从 sed 获取到 awk。
  • cat RUNNABLE.txt | sed -e '/./{H;$!d;}' -e 'x;/{2q}/!d;'
  • 最后,按分段分组。我还没有走那么远。但我的计划是通过删除换行符将每个子堆栈折叠为一行,然后使用 sort 后跟 uniq -c。
  • 最佳答案

    下列:

    # extract first fields from each group
    awk -v RS='' -v FS='\n' -v n=3 'NF > n { for (i = 2; i <= n + 1; ++i) print $i; printf "%c", "\0" }' |
    # sort and uniq
    sort -z | uniq -zc | sort -zrnk1 |
    # some messy output formatting
    sed 's/\x00//g; s/^ *\([0-9]\+\) */#\n\1#/; 1s/^#\n//; s/^ *at/#at/' | column -t -s'#' -o ' '

    输出:
    2   at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
    at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
    at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)

    2 at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
    at java.net.SocketInputStream.read(SocketInputStream.java:171)

    1 at java.net.PlainSocketImpl.socketAccept(Native Method)
    at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
    at java.net.ServerSocket.implAccept(ServerSocket.java:545)
  • 记录分隔符设置为空行。这样我就让 awk 一次性阅读了每个段落, 因为它们由空行分隔。字段分隔符为一行。因此,在每个段落中,每一行都可以通过单独的 $num 轻松访问。多变的。然后我只输出来自 2 的行至 n+1从每个段落中提取行。这些行以零字节为后缀。
  • sort -z | uniq -zc然后计算计数。
  • sort -zrnk1然后使用 uniq 的数字对其进行排序输出。
  • 然后乱七八糟的sed传送到 column用于做一个很好的列化输出。
  • 关于awk - sed 或 awk : group by paragraphs consisting of 2nd upto n+1th lines of each paragraph,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60383123/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com