gpt4 book ai didi

c - `clflush` 大内存区不刷新?

转载 作者:行者123 更新时间:2023-12-04 13:34:05 25 4
gpt4 key购买 nike

我想尝试测量内存访问的时间差异,从缓存访问时和从主内存访问时。
考虑这个程序:

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <time.h>

#include <x86intrin.h>

#define CL_SIZE 64
#define REGION_SIZE 0x40000000 //A 1Gb memory region

void gen_perm(int* arr_ptr,int N)
/** This function generates a random permutation of integers**/
{
srand(time(NULL));
for (int i=0;i<N;i++)
arr_ptr[i] = i;

for(int i=N-1;i>=0;i--)
{
int index = rand() % (i+1);
int temp = arr_ptr[i];
arr_ptr[i] = arr_ptr[index];
arr_ptr[index] = temp;
}
}

inline void force_read(char* address)
/** We force a memory read of a given memory address **/
{
asm volatile (""::"r"(*address):"memory");
}

inline uint64_t timing()
/* This function gives us a timestamp, useful for measuring clock cycles */
{
uint32_t time_lo, time_hi;
asm volatile(
"mfence \n"
"rdtscp \n"
"mov %%edx, %[hi]\n"
"mov %%eax, %[lo]\n"
: [hi] "=r" (time_hi), [lo] "=r" (time_lo)
:
: "rdx", "rax", "rcx");
return ( time_lo | ((uint64_t)time_hi << 32) );
}

char* mapped_area;

void clean_cache()
/**Objective is to flush the mapped_area completely from the cache
* */
{
for (int i=0;i<512*100;i+=CL_SIZE) // ---> NOTE THE !!! 512*100 !!! I'm not even flushing the entire mapped_area
asm volatile ("clflush %[target]"::[target]"m"(mapped_area[i]));
//_mm_clflush(&mapped_area[i]); ---> You can use this intrinsic function too

}

int profile(int stride,int range)
{
uint64_t* result_array = malloc(range*sizeof(uint64_t));
int* perm_array = malloc(range*sizeof(int));

gen_perm(perm_array,range);
clean_cache();

for(int i = 0; i < range; i++)
{
int mixed_index = perm_array[i]; //Trying to remove the prefetcher influence
uint64_t time[2];

time[0] = timing();
force_read(&mapped_area[mixed_index*stride]);
time[1] = timing();

result_array[mixed_index] = time[1]-time[0];
}

printf("\nLineNo\tTime");
for(int i=0; i< range; i++)
printf("\n%d\t%lu",i,result_array[i]);

free(perm_array);
free(result_array);
}

int main()
{
mapped_area = memalign(4096,REGION_SIZE);
profile(512, 100);
free(mapped_area);
return 0;
}
我得到的输出是:
LineNo  Time
0 76
1 76
2 76
3 76
4 692
5 76
6 76
7 76
8 280
9 76
10 76
11 76
12 76

....

97 76
98 76
99 76
显然,如果我实际上是从内存中获取值,那么这个值太小而不正确(我认为它应该在 200-300 左右)。我哪里会出错?

最佳答案

看起来代码是正确的(据我所知)。我认为问题的出现是由于操作系统的介入,巧妙地 page_faults 并返回 0 每当我尝试从 mapped_area 中阅读某些内容时.这可以通过强制操作系统通过写入页面来为我们提供页面来轻松缓解。

int main()
{
mapped_area = memalign(4096,REGION_SIZE);
for(int i = 0;i<REGION_SIZE;i+=4096) mapped_area[i] = 123; //4096 is the page size. I should probably replace by a standard macro
profile(512, 100);
free(mapped_area);
return 0;
}
这给了我:
LineNo  Time
0 272
1 266
2 422
3 234
4 234
5 254
6 220
7 230
8 266
...

97 212
98 264
99 268
相当一致!
P.S:我不知道为什么即使我们有页面错误,阅读时间也没有增加......有什么想法吗?

关于c - `clflush` 大内存区不刷新?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/63281687/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com