1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
// SPDX-License-Identifier: GPL-2.0
/*
* fill_buf benchmark
*
* Copyright (C) 2018 Intel Corporation
*
* Authors:
* Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
* Fenghua Yu <fenghua.yu@intel.com>
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <inttypes.h>
#include <string.h>
#include "resctrl.h"
#define CL_SIZE (64)
#define PAGE_SIZE (4 * 1024)
#define MB (1024 * 1024)
static void sb(void)
{
#if defined(__i386) || defined(__x86_64)
asm volatile("sfence\n\t"
: : : "memory");
#endif
}
static void cl_flush(void *p)
{
#if defined(__i386) || defined(__x86_64)
asm volatile("clflush (%0)\n\t"
: : "r"(p) : "memory");
#endif
}
void mem_flush(unsigned char *buf, size_t buf_size)
{
unsigned char *cp = buf;
size_t i = 0;
buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
for (i = 0; i < buf_size; i++)
cl_flush(&cp[i * CL_SIZE]);
sb();
}
/*
* Buffer index step advance to workaround HW prefetching interfering with
* the measurements.
*
* Must be a prime to step through all indexes of the buffer.
*
* Some primes work better than others on some architectures (from MBA/MBM
* result stability point of view).
*/
#define FILL_IDX_MULT 23
static int fill_one_span_read(unsigned char *buf, size_t buf_size)
{
unsigned int size = buf_size / (CL_SIZE / 2);
unsigned int i, idx = 0;
unsigned char sum = 0;
/*
* Read the buffer in an order that is unexpected by HW prefetching
* optimizations to prevent them interfering with the caching pattern.
*
* The read order is (in terms of halves of cachelines):
* i * FILL_IDX_MULT % size
* The formula is open-coded below to avoiding modulo inside the loop
* as it improves MBA/MBM result stability on some architectures.
*/
for (i = 0; i < size; i++) {
sum += buf[idx * (CL_SIZE / 2)];
idx += FILL_IDX_MULT;
while (idx >= size)
idx -= size;
}
return sum;
}
void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
{
int ret = 0;
while (1) {
ret = fill_one_span_read(buf, buf_size);
if (once)
break;
}
/* Consume read result so that reading memory is not optimized out. */
*value_sink = ret;
}
unsigned char *alloc_buffer(size_t buf_size, bool memflush)
{
void *buf = NULL;
uint64_t *p64;
ssize_t s64;
int ret;
ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
if (ret < 0)
return NULL;
/* Initialize the buffer */
p64 = buf;
s64 = buf_size / sizeof(uint64_t);
while (s64 > 0) {
*p64 = (uint64_t)rand();
p64 += (CL_SIZE / sizeof(uint64_t));
s64 -= (CL_SIZE / sizeof(uint64_t));
}
/* Flush the memory before using to avoid "cache hot pages" effect */
if (memflush)
mem_flush(buf, buf_size);
return buf;
}
ssize_t get_fill_buf_size(int cpu_no, const char *cache_type)
{
unsigned long cache_total_size = 0;
int ret;
ret = get_cache_size(cpu_no, cache_type, &cache_total_size);
if (ret)
return ret;
return cache_total_size * 2 > MINIMUM_SPAN ?
cache_total_size * 2 : MINIMUM_SPAN;
}
|