Memory Layout: Array of Structs vs Struct of Arrays
Question 1 / 51 • Correct so far: 0 (0 answered)
Aos Sum
float sumX(const std::vector<Particle>& particles) {
float total = 0.0f;
for (const auto& p : particles)
total += p.x;
return total;
}
float result = sumX(AOS); Soa Sum
float sumX(const ParticlesSoA& particles) {
float total = 0.0f;
for (float v : particles.x)
total += v;
return total;
}
float result = sumX(SOA); Shared test data (shared-setup)
constexpr int kCount = 1 << 20;
// Particle is padded to exactly 64 bytes (one cache line).
struct Particle {
float x;
float y, z;
float padding[13]; // total: 16 floats = 64 bytes
};
static std::vector<Particle> makeAoS() {
std::vector<Particle> v(kCount);
for (int i = 0; i < kCount; ++i) {
v[i] = {};
v[i].x = i;
v[i].y = i * 2.0f;
v[i].z = i * 3.0f;
}
return v;
}
struct ParticlesSoA {
std::vector<float> x, y, z;
};
static ParticlesSoA makeSoA() {
ParticlesSoA p{ std::vector<float>(kCount), std::vector<float>(kCount), std::vector<float>(kCount) };
for (int i = 0; i < kCount; ++i) {
p.x[i] = i;
p.y[i] = i * 2.0f;
p.z[i] = i * 3.0f;
}
return p;
};
static const std::vector<Particle> AOS = makeAoS();
static const ParticlesSoA SOA = makeSoA(); Which snippet is faster?
Snippet B is faster. The AoS loop reads a full 64-byte struct per element but uses only the 4-byte x field, wasting 60 bytes of cache bandwidth. SoA stores all x values contiguously, so each cache line delivers 16 usable floats with no wasted bandwidth.
Benchmark results
| Snippet | CPU time / iteration | Speedup |
|---|---|---|
| Aos Sum | 1.17 ms | 1.0× |
| Soa Sum | 585 us | 2.0× |
Explore the source
Open in Compiler ExplorerQuiz complete. You can return to the question list to restart and compare.