haskell - 为什么我的程序使用这么多内存？-6ren

haskell - 为什么我的程序使用这么多内存？

转载作者：行者123 更新时间：2023-12-02 13:26:51

对于 25mb 的文件，内存使用量恒定为 792mb!我以为和我的使用习惯有关从列表中移动，但移动向量代码的某些部分(例如应用 fft 的数组)根本不会改变正在使用的内存量!

{-# LANGUAGE OverloadedStrings,BangPatterns #-}
import qualified Data.Attoparsec.Char8 as Ap
import Data.Attoparsec
import Control.Monad
import Control.Applicative
--import Control.DeepSeq (force)
import System.IO 
import System.Environment
import Data.List (zipWith4,unzip4,zip4,foldl')
import Data.Bits
import Data.Complex
import Data.String (fromString)
import Data.ByteString.Internal
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as Bl 
import qualified Data.Vector.Unboxed as Vu
import qualified Statistics.Transform  as St



{-
I run a test on a collection of data from a file
[(1,t),(2,t),(3,t),(4,t),(5,t)]
   -     -     - 
   |     -     -     -
   |     |     -     -     -
   |     |     |
 [y++t,  n,  y++t]
To do that, I use splitN to create a list of list
[[(1,t),(2,t),(3,t)],[(2,t),(3,t),(4,t)],[(3,t),(4,t),(5,t)]]
Map a serie of functions to determine a value for each inner collection,
and return when an event happened.



-}

data FourD b a = FourD  a a a b

instance Functor (FourD c) where  
    fmap f (FourD x y z d) = FourD  (f x) (f y) (f z) d  

mgrav_per_bit = [ 18, 36, 71, 143, 286, 571, 1142 ]
--Converting raw data to mg
aToG :: Int -> Double    
aToG a = fromIntegral . sign $  uresult 
    where   
        twocomp = if a>128
                  then 256-a
                  else a
        uresult = sum  $ zipWith (*)   mgrav_per_bit (map (fromEnum . testBit  twocomp) [0..7])
        sign = if a > 128 
               then negate 
               else id


--Data is (int,int,int,time)
--Converted to (St.CD^3,Bytestring) in place of maping afterwards.                  
parseAcc :: Parser (FourD B.ByteString St.CD)
parseAcc = do   Ap.char '('
                x <-  fmap ((:+0) . aToG) Ap.decimal  
                Ap.char ','
                y <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                z <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                time <- takeTill (== 41)
                Ap.char ')'
                return $! FourD x y z time
--applies parseAcc to many lines, fails at the end of file (Need to add a newline)
parseFile = many $ parseAcc <* (Ap.endOfInput <|> Ap.endOfLine)


readExpr input = case parse parseFile  input of
     Done b val -> val
     Partial p -> undefined
     Fail a b c -> undefined 

unType  (FourD  x y d z) = (x ,y ,d ,z)          


-- Breaks a list of FourD into smaller lists, apply f and g to those lists, then filter the result based if an even happened or not
amap  :: (Num c, Ord c) =>     ([a] -> [c]) -> ([d] -> [ByteString]) -> [FourD d a] -> [Bl.ByteString]
amap f g = (uncurry4 (zipWith4 (filterAcc))). map4 f g . unzip4 . map (unType)
    where map4 f g (a,b,c,d) = (f a,f b,f c,g d)
          uncurry4 f (a,b,c,d) = f a b c d 

-- before  i had map filterAcc,outside amap. Tried to fuse everything to eliminate intermediaries

-- An event is detected if x > 50
filterAcc  x y z t = if x > 50
                                then  (Bl.pack . B.unpack) $ "yes: " `B.append`  t  
                                else  ""
-- split [St.CD] in [(Vector St.CD)], apply fft to each, and compress to a single value. 
-- Core of the application
fftAcross :: [St.CD] -> [Int]
fftAcross = map (floor . noiseEnergy .  St.fft) . splitN 32 

-- how the value is determined (sum of all magnitudes but the first one)
noiseEnergy  :: (RealFloat a, Vu.Unbox a) => Vu.Vector (Complex a) -> a
noiseEnergy  x = (Vu.foldl' (\b a-> b+(magnitude a)) 0 (Vu.drop 1 x))/32

-- how the values are split in (Vector St.CD), if lenght > 32, takes 32, otherwhise I'm done
splitN :: Vu.Unbox a => Int -> [a] -> [Vu.Vector a]
splitN n x =  helper x 
    where
    helper x   = if     atLeast n x 
                 then   (Vu.take n (Vu.fromList x)) : (helper  (drop 1 x) )
                 else  []
-- Replacing the test by atLeast in place of a counter (that compared to length x,calculated once) reduced the behaviour that memory usage was constant.     

-- this is replicated so the behaviour of splitN happens on the time part of FourD, Can't use the same since there is no Vector Bytestring instance                
splitN2 n x =  helper x 
    where
    helper x   = if   atLeast n x 
                 then  (head   x) : (helper  (drop 1 x))
                 else  []

atLeast :: Int -> [a] -> Bool
atLeast 0 _      = True
atLeast _ []     = False
atLeast n (_:ys) = atLeast (n-1) ys



main = do    

    filename <- liftM head getArgs
    filehandle <- openFile "results.txt" WriteMode
    contents <- liftM readExpr $ B.readFile filename
    Bl.hPutStr (filehandle) .  Bl.unlines .  splitAndApplyAndFilter  $ contents where
        splitAndApplyAndFilter  = amap fftAcross (splitN2 32)

编辑:经过一些重构、融合一些映射、减少长度后，我设法使用 25mb 输入文件在 400~ 下工作。不过，如果是 100mb，则需要 1.5gb。

该程序旨在确定某个时间点是否发生了某个事件，因为它需要一组值(我使用的是 32 atm)，在其中运行 fft，对这些值求和并查看是否通过阈值。如果是，则将时间打印到文件中。

http://db.tt/fT8kXPKz 25mb 测试文件

最佳答案

由于 Reddit 中关于同一问题的主题，我找到了解决方案! Parsing with Haskell and Attoparsec

我的大部分问题是由于 attoparsec 很严格并且 haskell 数据相当大(因此 100mb 的文本文件在运行时实际上可能要大得多)引起的

另一半是分析使内存使用量增加了一倍，但我没有考虑到这一点。

将解析器更改为惰性后，我的程序使用 120mb 代替 800mb(当输入大小为 116mb 时)，所以成功了!

如果有人对此感兴趣，以下是相关的代码更改:

readExpr input = case parse (parseAcc<*(Ap.endOfLine<*Ap.endOfInput<|>Ap.endOfLine)) input of
     Done b val -> val : readExpr b
     Partial  e -> []
     Fail _ _ c -> error c

完整代码:

{-# LANGUAGE OverloadedStrings,BangPatterns #-}
import qualified Data.Attoparsec.Char8 as Ap
import Data.Attoparsec
import Control.Monad
import Control.Applicative
--import Control.DeepSeq (force)
import System.IO 
import System.Environment
import Data.List (zipWith4,unzip4,zip4,foldl')
import Data.Bits
import Data.Complex
import Data.String (fromString)
import Data.ByteString.Internal
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as Bl 
import qualified Data.Vector.Unboxed as Vu
import qualified Statistics.Transform  as St


{-
I run a test on a collection of data from a file
[(1,t),(2,t),(3,t),(4,t),(5,t)]
   -     -     - 
   |     -     -     -
   |     |     -     -     -
   |     |     |
 [y++t,  n,  y++t]
To do that, I use splitN to create a list of list
[[(1,t),(2,t),(3,t)],[(2,t),(3,t),(4,t)],[(3,t),(4,t),(5,t)]]
Map a serie of functions to determine a value for each inner collection,
and return when an event happened.



-}

data FourD b a = FourD  a a a b

instance Functor (FourD c) where  
    fmap f (FourD x y z d) = FourD  (f x) (f y) (f z) d  

mgrav_per_bit = [ 18, 36, 71, 143, 286, 571, 1142 ]
--Converting raw data to mg
aToG :: Int -> Double    
aToG a = fromIntegral . sign $  uresult 
    where   
        twocomp 
            | a>128     = 256-a
            | otherwise =     a
        uresult = sum  $ zipWith (*)   mgrav_per_bit (map (fromEnum . testBit  twocomp) [0..7])
        sign 
            | a > 128   = negate
            | otherwise =     id


--Data is (int,int,int,time)
--Converted to (St.CD^3,Bytestring) in place of maping afterwards.                  
parseAcc :: Parser (FourD B.ByteString St.CD)
parseAcc = do   Ap.char '('
                x <-  fmap ((:+0) . aToG) Ap.decimal  -- Parse, transform to mg, convert to complex
                Ap.char ','
                y <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                z <-  fmap ((:+0) . aToG) Ap.decimal
                Ap.char ','
                time <- takeTill (== 41)
                Ap.char ')'
                return $! FourD x y z time
--applies parseAcc to many lines, fails at the end of file (Need to add a newline)
parseFile = many $ parseAcc <* (Ap.endOfInput <|> Ap.endOfLine)


readExpr input = case parse (parseAcc<*(Ap.endOfLine<*Ap.endOfInput<|>Ap.endOfLine)) input of
     Done b val -> val : readExpr b
     Partial  e -> []
     Fail _ _ c -> error c 

unType  (FourD  x y d z) = (x ,y ,d ,z)          


-- Breaks a list of FourD into smaller lists, apply f and g to those lists, then filter the result based if an even happened or not
amap  :: (Num c, Ord c) =>     ([a] -> [c]) -> ([d] -> [ByteString]) -> [FourD d a] -> [ByteString]
amap f g = (uncurry4 (zipWith4 (filterAcc))). map4 f g . unzip4 . map (unType)
    where map4 f g (a,b,c,d) = (f a,f b,f c,g d)
          uncurry4 f (a,b,c,d) = f a b c d 

-- before  i had map filterAcc,outside amap. Tried to fuse everything to eliminate intermediaries

-- An event is detected if x > 50
filterAcc  x y z t 
              | x > 50    = t
              | otherwise = ""

-- split [St.CD] in [(Vector St.CD)], apply fft to each, and compress to a single value. 
-- Core of the application
fftAcross :: [St.CD] -> [Int]
fftAcross = map (floor . noiseEnergy .  St.fft) . splitN 32 


-- how the value is determined (sum of all magnitudes but the first one)
noiseEnergy  :: (RealFloat a, Vu.Unbox a) => Vu.Vector (Complex a) -> a
noiseEnergy  x = (Vu.foldl' (\b a-> b+(magnitude a)) 0 (Vu.drop 1 x))/32


-- how the values are split in (Vector St.CD), if lenght > 32, takes 32, otherwhise I'm done
splitN :: Vu.Unbox a => Int -> [a] -> [Vu.Vector a]
splitN n x =  helper x 
    where
    helper x   
            | atLeast n x = (Vu.take n (Vu.fromList x)) : (helper  (drop 1 x) )
            | otherwise   = []

-- Replacing the test by atLeast in place of a counter (that compared to length x,calculated once) reduced the behaviour that memory usage was constant.     

-- this is replicated so the behaviour of splitN happens on the time part of FourD, Can't use the same since there is no Vector Bytestring instance                
splitN2 n x =  helper x 
    where
    helper x   
            | atLeast n x = (head   x) : (helper  (drop 1 x))
            | otherwise   = []

atLeast :: Int -> [a] -> Bool
atLeast 0 _      = True
atLeast _ []     = False
atLeast n (_:ys) = atLeast (n-1) ys

intervalFinder :: [ByteString]->[B.ByteString]
intervalFinder x = helper x ""
    where
    helper (x:xs) "" 
        | x /= ""   = ("Start Time: " `B.append` x `B.append` "\n"):(helper xs x)
        | otherwise = helper xs ""
    helper (x:xs) y
        | x == ""   = ( "End   Time: "`B.append`  y `B.append` "\n\n" ):(helper xs "")
        | otherwise = helper xs x
    helper _ _      = []

main = do
    filename <- liftM head getArgs
    filehandle <- openFile "results.txt" WriteMode
    contents <- liftM readExpr $ B.readFile filename
    Bl.hPutStr (filehandle) .  Bl.fromChunks . intervalFinder . splitAndApplyAndFilter  $ contents 
    hClose filehandle
    where
         splitAndApplyAndFilter  = amap fftAcross (splitN2 32)  





    --contents <- liftM ((map ( readExpr )) . B.lines) $ B.readFile filename


   {-     *Main> let g = liftM ((amap fftAcross (splitN2 32)) . readExpr) $ B.readFile "te
stpattern2.txt"
-}

   -- B.hPutStrLn (filehandle)  . B.unlines . map (B.pack . show ) .  amap (map (floor .quare) .  (filter (/=[])) . map ( (drop 1) . (map (/32)) . fft ) . splitN 32) . map ( fmap(fromIntegral . aToG)) . map readExpr $ contents

关于haskell - 为什么我的程序使用这么多内存？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/12170439/

文章推荐： android - 在Android的AlertDialog中处理Edittext

文章推荐： java - 为什么使用注解@SuppressWarnings？

文章推荐： java - 多个按钮的onClick函数

文章推荐： java - 使用 protoc v3 生成 grpc Java stub 时出现问题

haskell - Haskell 和类 Haskell 语言之间的类型声明语法差异
在 Haskell 中，类型声明使用双冒号，即 (::)，如 not::Bool -> Bool。但是在许多语法与 Haskell 类似的语言中，例如榆树、 Agda 、他们使用单个冒号(:)来声明
haskell - 在模板 haskell 中运行模板 haskell
insertST :: StateDecoder -> SomeState -> Update SomeState SomeThing insertST stDecoder st = ... Stat
haskell - 在 Haskell ("second order Haskell"中生成 Haskell 类型的工具？
如果这个问题有点含糊，请提前道歉。这是一些周末白日梦的结果。借助 Haskell 出色的类型系统，将数学(尤其是代数)结构表达为类型类是非常令人愉快的。我的意思是，看看 numeric-prelud
haskell - 如何仅使用 Haskell 无休止地运行 Haskell 程序？
我有需要每 5 分钟执行一次的小程序。目前，我有执行该任务的 shell 脚本，但我想通过 CLI 中的键为用户提供无需其他脚本即可运行它的能力。实现这一目标的最佳方法是什么？最佳答案我想你会
haskell - 需要以真实世界 Haskell 风格解决哪些 Haskell 主题？
RWH 面世已经有一段时间了(将近 3 年)。在在线跟踪这本书的渐进式写作之后，我渴望获得我的副本(我认为这是写书的最佳方式之一。)在所有相当学术性的论文中，作为一个 haskell 学生，读起来多么
haskell - 用 Haskell 编写 Haskell 解释器
一个经典的编程练习是用 Lisp/Scheme 编写一个 Lisp/Scheme 解释器。可以利用完整语言的力量来为该语言的子集生成解释器。 Haskell 有类似的练习吗？我想使用 Haskell
haskell - Haskell 中的仿函数定义及其在 Learn You a Haskell 中的解释令人困惑
以下摘自' Learn You a Haskell ' 表示 f 在函数中用作“值的类型”。这是什么意思？即“值的类型”是什么意思？ Int 是“值的类型”，对吗？但是 Maybe 不是“值的类型”
haskell - haskell 中有包含字符串和列表的类型吗？
现在我正在尝试创建一个基本函数，用于删除句子中的所有空格或逗号。 stringToIntList :: [Char] -> [Char] stringToIntList inpt = [ a | a
haskell - 案例中的模式匹配，Haskell
我是 Haskell 的新手，对模式匹配有疑问。这是代码的高度简化版本: data Value = MyBool Bool | MyInt Integer codeDuplicate1 :: Valu
haskell - Haskell 中的这个仿函数是什么意思？
如何解释这个表达式？ :t (+) (+3) (*100) 自和具有相同的优先级并且是左结合的。我认为这与 ((+) (+3)) (*100) 相同.但是，我不知道它的作用。在 Learn
haskell - Haskell 如何计算表达式
这怎么行 > (* 30) 4 120 但这不是 > * 30 40 error: parse error on input ‘*’ 最佳答案 (* 30) 是一个 section，它仍然将 * 视为
haskell - 删除满足谓词的第一个元素(Haskell)
我想创建一个函数，删除满足第二个参数中给定谓词的第一个元素。像这样: removeFirst "abab" ( 'b') = "abab" removeFirst [1,2,3,4] even =
haskell - Haskell 中的内存
Context : def fib(n): if n aand returns a memoized version of the same function. The trick is t
haskell - 惰性评估和严格评估 Haskell
我明白惰性求值是什么，它是如何工作的以及它有什么优势，但是你能解释一下 Haskell 中什么是严格求值吗？我似乎找不到太多关于它的信息，因为惰性评估是最著名的。他们各自的优势是什么。什么时候真正使
haskell - Haskell 中的反向函数行为
digits :: Int -> [Int] digits n = reverse (x) where x | n digits 1234 = [3,1,2,4]
haskell - Haskell 是否支持类型类的匿名实例？
我在 F# 中有以下代码(来自一本书) open System.Collections.Generic type Table = abstract Item : 'T -> 'U with ge
haskell - 使用需要多个输入的过滤器 - Haskell
我对 Haskell 比较陌生，过去几周一直在尝试学习它，但一直停留在过滤器和谓词上，我希望能得到帮助以帮助理解。我遇到了一个问题，我有一个元组列表。每个元组包含一个 (songName, song
haskell - 或采用两个值参数 haskell
我是 haskell 的初学者，我试图为埃拉托色尼筛法定义一个简单的函数，但它说错误: • Couldn't match expected type ‘Bool -> Bool’
haskell - Haskell 中的读取函数
我是 Haskell 语言的新手，我在使用 read 函数时遇到了一些问题。准确地说，我的理解是: read "8.2" + 3.8 应该返回 12.0，因为我们希望返回与第二个成员相同的类型。我真正
haskell - Haskell 声明中的感叹号是什么意思？
当我尝试使用真实项目来驱动它来学习 Haskell 时，我遇到了以下定义。我不明白每个参数前面的感叹号是什么意思，我的书上好像也没有提到。 data MidiMessage = MidiMessage

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

haskell - 为什么我的程序使用这么多内存？