c - `clflush` 大内存区不刷新？-6ren

c - `clflush` 大内存区不刷新？

转载作者：行者123 更新时间：2023-12-04 13:34:05

25

4

我想尝试测量内存访问的时间差异，从缓存访问时和从主内存访问时。
考虑这个程序:

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <time.h>

#include <x86intrin.h>

#define CL_SIZE 64
#define REGION_SIZE 0x40000000  //A 1Gb memory region

void gen_perm(int* arr_ptr,int N)
/** This function generates a random permutation of integers**/
{
  srand(time(NULL));
  for (int i=0;i<N;i++)
    arr_ptr[i] = i;

  for(int i=N-1;i>=0;i--)
  {
    int index = rand() % (i+1);
    int temp = arr_ptr[i];
    arr_ptr[i] = arr_ptr[index];
    arr_ptr[index] = temp;
  }
}

inline void force_read(char* address) 
/** We force a memory read of a given memory address **/
{
   asm  volatile (""::"r"(*address):"memory");
}

inline uint64_t timing()
/* This function gives us a timestamp, useful for measuring clock cycles */
{
  uint32_t time_lo, time_hi;
  asm volatile(
    "mfence \n"
    "rdtscp \n"
    "mov %%edx, %[hi]\n"
    "mov %%eax, %[lo]\n"
    : [hi] "=r" (time_hi), [lo] "=r" (time_lo)
    :
    : "rdx", "rax", "rcx");
  return ( time_lo | ((uint64_t)time_hi << 32) );
}

char* mapped_area;

void clean_cache()
/**Objective is to flush the mapped_area completely from the cache
 * */
{
  for (int i=0;i<512*100;i+=CL_SIZE) // ---> NOTE THE !!! 512*100 !!! I'm not even flushing the entire mapped_area
    asm volatile ("clflush %[target]"::[target]"m"(mapped_area[i]));
    //_mm_clflush(&mapped_area[i]);  ---> You can use this intrinsic function too

}

int profile(int stride,int range)
{
  uint64_t* result_array = malloc(range*sizeof(uint64_t));
  int* perm_array = malloc(range*sizeof(int));

  gen_perm(perm_array,range);
  clean_cache();

  for(int i = 0; i < range; i++)
  {
    int mixed_index = perm_array[i]; //Trying to remove the prefetcher influence
    uint64_t time[2];

    time[0] = timing();
    force_read(&mapped_area[mixed_index*stride]);
    time[1] = timing();

    result_array[mixed_index] = time[1]-time[0];
  }

  printf("\nLineNo\tTime");
  for(int i=0; i< range; i++)
    printf("\n%d\t%lu",i,result_array[i]);

  free(perm_array);
  free(result_array);
}

int main()
{
  mapped_area = memalign(4096,REGION_SIZE);
  profile(512, 100);
  free(mapped_area);
  return 0;
}

我得到的输出是:

LineNo  Time
0   76
1   76
2   76
3   76
4   692
5   76
6   76
7   76
8   280
9   76
10  76
11  76
12  76

....

97  76
98  76
99  76

显然，如果我实际上是从内存中获取值，那么这个值太小而不正确(我认为它应该在 200-300 左右)。我哪里会出错？

最佳答案

看起来代码是正确的(据我所知)。我认为问题的出现是由于操作系统的介入，巧妙地 page_faults 并返回 0 每当我尝试从 mapped_area 中阅读某些内容时.这可以通过强制操作系统通过写入页面来为我们提供页面来轻松缓解。

int main()
{
  mapped_area = memalign(4096,REGION_SIZE);
  for(int i = 0;i<REGION_SIZE;i+=4096)  mapped_area[i] = 123; //4096 is the page size. I should probably replace by a standard macro
  profile(512, 100);
  free(mapped_area);
  return 0;
}

这给了我:

LineNo  Time
0   272
1   266
2   422
3   234
4   234
5   254
6   220
7   230
8   266
...

97  212
98  264
99  268

相当一致!
P.S:我不知道为什么即使我们有页面错误，阅读时间也没有增加......有什么想法吗？

关于c - `clflush` 大内存区不刷新？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/63281687/

25

4

0

文章推荐： r - bit64 NA 不会在 data.frame 构造函数中复制

c++ - 指针追逐基准 : Read+Write(+CLFLUSH) faster than Read(+CLFLUSH)
我正在尝试了解使用 CLFLUSH 对性能的影响。为此，我写了一个小指针追逐基准测试。我拿 std::vector>其中第一个元素是下一个条目的偏移量，第二个元素是有效载荷。我从条目 0 转到下一个条
c - `clflush` 大内存区不刷新？
我想尝试测量内存访问的时间差异，从缓存访问时和从主内存访问时。考虑这个程序: #include #include #include #include #include #include
c - CLFLUSH() 的问题？
你能告诉我如何使用 clflush() 指令吗？我编写了以下简单代码来测量从缓存中读取变量的执行时间与从缓存中逐出变量后的执行时间之间的差异。但是我没有找到确凿的结果。使用 clflush() 清除缓
c++ - clflush 不刷新指令缓存
考虑以下代码段: #include #include #include #define ARRAYSIZE(arr) (sizeof(arr)/sizeof(arr[0])) inline vo
c - CLFLUSH 如何处理尚未在高速缓存中的地址？
我们正在尝试使用 Intel CLFLUSH 指令在用户空间刷新 Linux 中进程的缓存内容。我们创建了一个非常简单的 C 程序，它首先访问一个大数组，然后调用 CLFLUSH 来刷新整个数组的虚
clflush 通过 C 函数使缓存行无效
我正在尝试使用 clflush 手动逐出缓存行以确定缓存和行大小。我没有找到任何关于如何使用该指令的指南。我所看到的只是一些代码为此目的使用了更高级别的函数。有一个内核函数void clflush_
x86 - 为什么 x86 中存在 CLFLUSH？
我最近了解到 row hammer攻击。为了执行此攻击，程序员需要刷新 CPU 的完整缓存层次结构以获取特定数量的地址。我的问题是:为什么是 CLFLUSH x86 有必要吗？如果所有 L* 缓存都
performance - clflush 是否也会删除 TLB 条目？
是clflush 1 还刷新关联的 TLB 条目吗？我认为不会，因为 clflush 在缓存行粒度上运行，而 TLB 条目存在于(更大的)页面粒度 - 但我准备好感到惊讶。 1 ...或 clflus
opencl - clFlush(与 clFinish 相反)实际上有什么作用吗？
OpenCL clFinish() API 调用会阻塞，直到命令队列上的所有命令都已完成执行。相关函数， clFlush() ，据说 Issues all previously queued Open
c - 系统崩溃时 clflush 或 clflushopt 是原子的吗？
通常，缓存行是 64B，但非 volatile 内存的原子性是 8B。例如: x[1]=100; x[2]=100; clflush(x); x缓存行对齐，初始设置为 0 . 系统崩溃 clflus
c++ - clflush 在 i7 中没有给出 const 数据类型的正确答案
我已经编写了两个程序来检查 clflush 是否正在从缓存中逐出我的数据。在我编写的两个程序中，只有一个给出了正确的结果(按照我的预期，在 clflush 之后，访问时间必须比刷新之前更长)。这是我
x86 - clflush 实现 : Why's m8 in "Flushes cache line containing m8"?
英特尔文档中的 clflush 描述称“刷新包含 m8 的缓存行。”。此外，在 Intel 文档中，m8 表示“内存中的一个字节”。我很困惑为什么它只是m8，它只有一个字节。因为对于32位或64位系
x86 - clflush 实现 : Why's m8 in "Flushes cache line containing m8"?
英特尔文档中的 clflush 描述称“刷新包含 m8 的缓存行。”。此外，在 Intel 文档中，m8 表示“内存中的一个字节”。我很困惑为什么它只是m8，它只有一个字节。因为对于32位或64位系

首页

博学

6Ren·AI

商城

c - `clflush` 大内存区不刷新？