Parallel Benchmarks

Master parallel benchmarking techniques to accurately measure the performance characteristics of concurrent Go applications and identify scaling bottlenecks.

Understanding Parallel Benchmarks

Parallel benchmarks test how your code performs under concurrent load, revealing scaling characteristics, contention issues, and race conditions that sequential benchmarks might miss.

Basic Parallel Benchmark Structure

func BenchmarkParallelExample(b *testing.B) {
    b.RunParallel(func(pb *testing.PB) {
        for pb.Next() {
            // Code to benchmark in parallel
            result := expensiveOperation()
            _ = result
        }
    })
}

Comparing Sequential vs Parallel Performance

func BenchmarkMapOperations(b *testing.B) {
    // Test data
    data := make(map[string]int)
    for i := 0; i < 1000; i++ {
        data[fmt.Sprintf("key%d", i)] = i
    }

    // Sequential benchmark
    b.Run("Sequential", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            key := fmt.Sprintf("key%d", i%1000)
            _ = data[key]
        }
    })

    // Parallel benchmark
    b.Run("Parallel", func(b *testing.B) {
        b.RunParallel(func(pb *testing.PB) {
            i := 0
            for pb.Next() {
                key := fmt.Sprintf("key%d", i%1000)
                _ = data[key]
                i++
            }
        })
    })
}

Thread-Safe Data Structure Benchmarks

func BenchmarkConcurrentMap(b *testing.B) {
    // Regular map with mutex
    b.Run("MapWithMutex", func(b *testing.B) {
        var mu sync.RWMutex
        data := make(map[string]int)

        // Pre-populate
        for i := 0; i < 1000; i++ {
            data[fmt.Sprintf("key%d", i)] = i
        }

        b.ResetTimer()
        b.RunParallel(func(pb *testing.PB) {
            i := 0
            for pb.Next() {
                key := fmt.Sprintf("key%d", i%1000)

                mu.RLock()
                _ = data[key]
                mu.RUnlock()

                i++
            }
        })
    })

    // sync.Map
    b.Run("SyncMap", func(b *testing.B) {
        var data sync.Map

        // Pre-populate
        for i := 0; i < 1000; i++ {
            data.Store(fmt.Sprintf("key%d", i), i)
        }

        b.ResetTimer()
        b.RunParallel(func(pb *testing.PB) {
            i := 0
            for pb.Next() {
                key := fmt.Sprintf("key%d", i%1000)
                _, _ = data.Load(key)
                i++
            }
        })
    })
}

Channel Performance Benchmarks

func BenchmarkChannelOperations(b *testing.B) {
    b.Run("Unbuffered", func(b *testing.B) {
        ch := make(chan int)

        b.RunParallel(func(pb *testing.PB) {
            go func() {
                for pb.Next() {
                    ch <- 42
                }
            }()

            for pb.Next() {
                <-ch
            }
        })
    })

    b.Run("Buffered/Size100", func(b *testing.B) {
        ch := make(chan int, 100)

        b.RunParallel(func(pb *testing.PB) {
            go func() {
                for pb.Next() {
                    select {
                    case ch <- 42:
                    default:
                    }
                }
            }()

            for pb.Next() {
                select {
                case <-ch:
                default:
                }
            }
        })
    })
}

Worker Pool Benchmarks

type WorkerPool struct {
    tasks chan func()
    wg    sync.WaitGroup
}

func NewWorkerPool(workerCount int) *WorkerPool {
    wp := &WorkerPool{
        tasks: make(chan func(), 100),
    }

    for i := 0; i < workerCount; i++ {
        go wp.worker()
    }

    return wp
}

func (wp *WorkerPool) worker() {
    for task := range wp.tasks {
        task()
        wp.wg.Done()
    }
}

func (wp *WorkerPool) Submit(task func()) {
    wp.wg.Add(1)
    wp.tasks <- task
}

func (wp *WorkerPool) Wait() {
    wp.wg.Wait()
}

func BenchmarkWorkerPool(b *testing.B) {
    workerCounts := []int{1, 2, 4, 8, 16}

    for _, count := range workerCounts {
        b.Run(fmt.Sprintf("Workers%d", count), func(b *testing.B) {
            pool := NewWorkerPool(count)

            b.ResetTimer()
            b.RunParallel(func(pb *testing.PB) {
                for pb.Next() {
                    pool.Submit(func() {
                        // Simulate work
                        time.Sleep(time.Microsecond)
                    })
                }
            })

            pool.Wait()
        })
    }
}

Lock Contention Benchmarks

func BenchmarkLockContention(b *testing.B) {
    // Mutex contention
    b.Run("Mutex", func(b *testing.B) {
        var mu sync.Mutex
        var counter int64

        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                mu.Lock()
                counter++
                mu.Unlock()
            }
        })
    })

    // RWMutex for reads
    b.Run("RWMutex/Read", func(b *testing.B) {
        var mu sync.RWMutex
        var data int64 = 42

        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                mu.RLock()
                _ = data
                mu.RUnlock()
            }
        })
    })

    // Atomic operations
    b.Run("Atomic", func(b *testing.B) {
        var counter int64

        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                atomic.AddInt64(&counter, 1)
            }
        })
    })
}

Memory Pool Benchmarks

func BenchmarkMemoryPools(b *testing.B) {
    // Regular allocation
    b.Run("RegularAlloc", func(b *testing.B) {
        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                data := make([]byte, 1024)
                // Simulate usage
                for i := 0; i < len(data); i++ {
                    data[i] = byte(i % 256)
                }
                _ = data
            }
        })
    })

    // sync.Pool
    b.Run("SyncPool", func(b *testing.B) {
        pool := &sync.Pool{
            New: func() interface{} {
                return make([]byte, 1024)
            },
        }

        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                data := pool.Get().([]byte)
                // Simulate usage
                for i := 0; i < len(data); i++ {
                    data[i] = byte(i % 256)
                }
                pool.Put(data)
            }
        })
    })

    // Channel-based pool
    b.Run("ChannelPool", func(b *testing.B) {
        poolSize := runtime.NumCPU() * 2
        pool := make(chan []byte, poolSize)

        // Pre-populate pool
        for i := 0; i < poolSize; i++ {
            pool <- make([]byte, 1024)
        }

        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                var data []byte
                select {
                case data = <-pool:
                default:
                    data = make([]byte, 1024)
                }

                // Simulate usage
                for i := 0; i < len(data); i++ {
                    data[i] = byte(i % 256)
                }

                select {
                case pool <- data:
                default:
                    // Pool full, let GC handle it
                }
            }
        })
    })
}

HTTP Server Benchmarks

func BenchmarkHTTPServer(b *testing.B) {
    // Simple handler
    handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
        w.WriteHeader(http.StatusOK)
        w.Write([]byte("Hello, World!"))
    })

    server := httptest.NewServer(handler)
    defer server.Close()

    client := &http.Client{
        Transport: &http.Transport{
            MaxIdleConnsPerHost: 100,
        },
    }

    b.ResetTimer()
    b.RunParallel(func(pb *testing.PB) {
        for pb.Next() {
            resp, err := client.Get(server.URL)
            if err != nil {
                b.Fatal(err)
            }
            resp.Body.Close()
        }
    })
}

Database Connection Pool Benchmarks

func BenchmarkDBConnectionPool(b *testing.B) {
    // Setup database with different pool sizes
    poolSizes := []int{1, 5, 10, 25, 50}

    for _, poolSize := range poolSizes {
        b.Run(fmt.Sprintf("PoolSize%d", poolSize), func(b *testing.B) {
            db := setupTestDB()
            db.SetMaxOpenConns(poolSize)
            db.SetMaxIdleConns(poolSize / 2)
            defer db.Close()

            b.ResetTimer()
            b.RunParallel(func(pb *testing.PB) {
                for pb.Next() {
                    var count int
                    err := db.QueryRow("SELECT COUNT(*) FROM users").Scan(&count)
                    if err != nil {
                        b.Fatal(err)
                    }
                }
            })
        })
    }
}

CPU-bound vs I/O-bound Workloads

func BenchmarkWorkloadTypes(b *testing.B) {
    // CPU-bound workload
    b.Run("CPUBound", func(b *testing.B) {
        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                // CPU-intensive calculation
                result := 0
                for i := 0; i < 10000; i++ {
                    result += i * i
                }
                _ = result
            }
        })
    })

    // I/O-bound workload
    b.Run("IOBound", func(b *testing.B) {
        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                // Simulate I/O delay
                time.Sleep(time.Microsecond * 100)
            }
        })
    })

    // Mixed workload
    b.Run("Mixed", func(b *testing.B) {
        b.RunParallel(func(pb *testing.PB) {
            for pb.Next() {
                // Some CPU work
                result := 0
                for i := 0; i < 1000; i++ {
                    result += i
                }

                // Some I/O wait
                time.Sleep(time.Microsecond * 10)

                _ = result
            }
        })
    })
}

GOMAXPROCS Impact Benchmarks

func BenchmarkGOMAXPROCS(b *testing.B) {
    originalGOMAXPROCS := runtime.GOMAXPROCS(0)
    defer runtime.GOMAXPROCS(originalGOMAXPROCS)

    maxProcs := []int{1, 2, 4, 8, runtime.NumCPU()}

    for _, procs := range maxProcs {
        runtime.GOMAXPROCS(procs)

        b.Run(fmt.Sprintf("GOMAXPROCS%d", procs), func(b *testing.B) {
            b.RunParallel(func(pb *testing.PB) {
                for pb.Next() {
                    // CPU-bound work
                    result := 0
                    for i := 0; i < 1000; i++ {
                        result += i * i
                    }
                    _ = result
                }
            })
        })
    }
}

Best Practices for Parallel Benchmarks

1. Avoid Shared State When Possible

// BAD: Shared counter causes contention
func BenchmarkSharedCounter(b *testing.B) {
    var counter int64

    b.RunParallel(func(pb *testing.PB) {
        for pb.Next() {
            atomic.AddInt64(&counter, 1)
            // This measures contention, not your algorithm
        }
    })
}

// GOOD: Per-goroutine state
func BenchmarkLocalCounter(b *testing.B) {
    b.RunParallel(func(pb *testing.PB) {
        localCounter := 0
        for pb.Next() {
            localCounter++
            // This measures your algorithm
        }
    })
}

2. Use Appropriate Buffer Sizes

func BenchmarkChannelBuffers(b *testing.B) {
    bufferSizes := []int{0, 1, 10, 100, 1000}

    for _, size := range bufferSizes {
        b.Run(fmt.Sprintf("Buffer%d", size), func(b *testing.B) {
            ch := make(chan int, size)

            b.RunParallel(func(pb *testing.PB) {
                go func() {
                    for pb.Next() {
                        select {
                        case ch <- 42:
                        default:
                        }
                    }
                }()

                for pb.Next() {
                    select {
                    case <-ch:
                    default:
                    }
                }
            })
        })
    }
}

3. Measure Real Scenarios

// Benchmark realistic parallel usage
func BenchmarkRealisticParallel(b *testing.B) {
    // Setup realistic shared resource
    cache := NewLRUCache(1000)

    // Pre-populate cache
    for i := 0; i < 500; i++ {
        cache.Put(fmt.Sprintf("key%d", i), i)
    }

    b.RunParallel(func(pb *testing.PB) {
        localRand := rand.New(rand.NewSource(time.Now().UnixNano()))

        for pb.Next() {
            if localRand.Float32() < 0.8 {
                // 80% reads
                key := fmt.Sprintf("key%d", localRand.Intn(1000))
                cache.Get(key)
            } else {
                // 20% writes
                key := fmt.Sprintf("key%d", localRand.Intn(1000))
                cache.Put(key, localRand.Intn(10000))
            }
        }
    })
}

Interpreting Parallel Benchmark Results

BenchmarkMapOperations/Sequential-8    20000000    75.0 ns/op
BenchmarkMapOperations/Parallel-8      50000000    25.0 ns/op

Key metrics to analyze:

Throughput improvement: 50M vs 20M operations
Per-operation latency: 25ns vs 75ns
Scalability: How performance changes with core count
Contention effects: Degradation under high concurrency

Parallel benchmarks reveal the true concurrency characteristics of your Go applications, helping you optimize for real-world concurrent usage patterns.

Parallel Benchmarks

Parallel Benchmarks

Understanding Parallel Benchmarks

Basic Parallel Benchmark Structure

Comparing Sequential vs Parallel Performance

Thread-Safe Data Structure Benchmarks

Channel Performance Benchmarks

Worker Pool Benchmarks

Lock Contention Benchmarks

Memory Pool Benchmarks

HTTP Server Benchmarks

Database Connection Pool Benchmarks

CPU-bound vs I/O-bound Workloads

GOMAXPROCS Impact Benchmarks

Best Practices for Parallel Benchmarks

1. Avoid Shared State When Possible

2. Use Appropriate Buffer Sizes

3. Measure Real Scenarios

Interpreting Parallel Benchmark Results

results matching ""

No results matching ""