Optimization Strategies

Master systematic approaches to Go performance optimization, from identifying bottlenecks to implementing targeted improvements across algorithms, data structures, and system architecture.

Performance Optimization Methodology

The Optimization Process

// 1. Measure First - Establish Baseline
func BenchmarkBaseline(b *testing.B) {
    data := generateTestData(10000)

    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        result := originalAlgorithm(data)
        _ = result
    }
}

// 2. Profile to Identify Bottlenecks
func ProfileBottlenecks() {
    f, _ := os.Create("cpu.prof")
    defer f.Close()

    pprof.StartCPUProfile(f)
    defer pprof.StopCPUProfile()

    // Run your application
    runApplicationWorkload()
}

// 3. Optimize Targeted Areas
func BenchmarkOptimized(b *testing.B) {
    data := generateTestData(10000)

    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        result := optimizedAlgorithm(data)
        _ = result
    }
}

// 4. Validate Improvements
func BenchmarkComparison(b *testing.B) {
    data := generateTestData(10000)

    b.Run("Original", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            result := originalAlgorithm(data)
            _ = result
        }
    })

    b.Run("Optimized", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            result := optimizedAlgorithm(data)
            _ = result
        }
    })
}

Algorithm Optimization

Complexity Reduction

package main

import (
    "fmt"
    "sort"
    "time"
)

// O(n²) - Inefficient nested loops
func findDuplicatesNaive(data []int) []int {
    var duplicates []int

    for i := 0; i < len(data); i++ {
        for j := i + 1; j < len(data); j++ {
            if data[i] == data[j] {
                duplicates = append(duplicates, data[i])
                break
            }
        }
    }

    return duplicates
}

// O(n log n) - Sort-based approach
func findDuplicatesSorted(data []int) []int {
    if len(data) < 2 {
        return nil
    }

    sorted := make([]int, len(data))
    copy(sorted, data)
    sort.Ints(sorted)

    var duplicates []int
    prev := sorted[0]

    for i := 1; i < len(sorted); i++ {
        if sorted[i] == prev {
            duplicates = append(duplicates, sorted[i])
            // Skip additional duplicates
            for i+1 < len(sorted) && sorted[i+1] == sorted[i] {
                i++
            }
        }
        prev = sorted[i]
    }

    return duplicates
}

// O(n) - Hash map approach
func findDuplicatesHash(data []int) []int {
    seen := make(map[int]bool)
    duplicateSet := make(map[int]bool)

    for _, value := range data {
        if seen[value] {
            duplicateSet[value] = true
        } else {
            seen[value] = true
        }
    }

    var duplicates []int
    for value := range duplicateSet {
        duplicates = append(duplicates, value)
    }

    return duplicates
}

// Benchmark different approaches
func BenchmarkDuplicateDetection(b *testing.B) {
    sizes := []int{100, 1000, 10000}

    for _, size := range sizes {
        data := generateDataWithDuplicates(size)

        b.Run(fmt.Sprintf("Naive_Size_%d", size), func(b *testing.B) {
            for i := 0; i < b.N; i++ {
                result := findDuplicatesNaive(data)
                _ = result
            }
        })

        b.Run(fmt.Sprintf("Sorted_Size_%d", size), func(b *testing.B) {
            for i := 0; i < b.N; i++ {
                result := findDuplicatesSorted(data)
                _ = result
            }
        })

        b.Run(fmt.Sprintf("Hash_Size_%d", size), func(b *testing.B) {
            for i := 0; i < b.N; i++ {
                result := findDuplicatesHash(data)
                _ = result
            }
        })
    }
}

func generateDataWithDuplicates(size int) []int {
    data := make([]int, size)
    for i := range data {
        data[i] = i % (size / 10) // Create duplicates
    }
    return data
}

Caching and Memoization

package main

import (
    "sync"
    "time"
)

// Expensive computation without caching
func expensiveComputation(n int) int {
    time.Sleep(time.Millisecond) // Simulate expensive operation
    if n <= 1 {
        return n
    }
    return expensiveComputation(n-1) + expensiveComputation(n-2)
}

// Memoized version with cache
type MemoizedComputer struct {
    cache map[int]int
    mu    sync.RWMutex
}

func NewMemoizedComputer() *MemoizedComputer {
    return &MemoizedComputer{
        cache: make(map[int]int),
    }
}

func (mc *MemoizedComputer) Compute(n int) int {
    // Check cache first
    mc.mu.RLock()
    if result, exists := mc.cache[n]; exists {
        mc.mu.RUnlock()
        return result
    }
    mc.mu.RUnlock()

    // Compute and cache result
    mc.mu.Lock()
    defer mc.mu.Unlock()

    // Double-check pattern
    if result, exists := mc.cache[n]; exists {
        return result
    }

    var result int
    if n <= 1 {
        result = n
    } else {
        result = mc.Compute(n-1) + mc.Compute(n-2)
    }

    mc.cache[n] = result
    return result
}

// LRU Cache implementation for bounded memory usage
type LRUCache struct {
    capacity int
    cache    map[int]*Node
    head     *Node
    tail     *Node
}

type Node struct {
    key   int
    value int
    prev  *Node
    next  *Node
}

func NewLRUCache(capacity int) *LRUCache {
    head := &Node{}
    tail := &Node{}
    head.next = tail
    tail.prev = head

    return &LRUCache{
        capacity: capacity,
        cache:    make(map[int]*Node),
        head:     head,
        tail:     tail,
    }
}

func (lru *LRUCache) Get(key int) (int, bool) {
    if node, exists := lru.cache[key]; exists {
        lru.moveToHead(node)
        return node.value, true
    }
    return 0, false
}

func (lru *LRUCache) Put(key, value int) {
    if node, exists := lru.cache[key]; exists {
        node.value = value
        lru.moveToHead(node)
    } else {
        newNode := &Node{key: key, value: value}

        if len(lru.cache) >= lru.capacity {
            // Remove least recently used
            tail := lru.removeTail()
            delete(lru.cache, tail.key)
        }

        lru.cache[key] = newNode
        lru.addToHead(newNode)
    }
}

func (lru *LRUCache) moveToHead(node *Node) {
    lru.removeNode(node)
    lru.addToHead(node)
}

func (lru *LRUCache) removeNode(node *Node) {
    node.prev.next = node.next
    node.next.prev = node.prev
}

func (lru *LRUCache) addToHead(node *Node) {
    node.prev = lru.head
    node.next = lru.head.next
    lru.head.next.prev = node
    lru.head.next = node
}

func (lru *LRUCache) removeTail() *Node {
    last := lru.tail.prev
    lru.removeNode(last)
    return last
}

// Cached computation with LRU eviction
type CachedComputer struct {
    cache *LRUCache
}

func NewCachedComputer(cacheSize int) *CachedComputer {
    return &CachedComputer{
        cache: NewLRUCache(cacheSize),
    }
}

func (cc *CachedComputer) Compute(n int) int {
    if result, exists := cc.cache.Get(n); exists {
        return result
    }

    var result int
    if n <= 1 {
        result = n
    } else {
        result = cc.Compute(n-1) + cc.Compute(n-2)
    }

    cc.cache.Put(n, result)
    return result
}

Data Structure Optimization

Efficient Data Access Patterns

package main

import (
    "sort"
    "strings"
)

// Inefficient string operations
func buildStringNaive(words []string) string {
    var result string
    for _, word := range words {
        result += word + " "
    }
    return strings.TrimSpace(result)
}

// Optimized with strings.Builder
func buildStringOptimized(words []string) string {
    var builder strings.Builder

    // Pre-allocate capacity if known
    totalLen := 0
    for _, word := range words {
        totalLen += len(word) + 1 // +1 for space
    }
    builder.Grow(totalLen)

    for i, word := range words {
        if i > 0 {
            builder.WriteByte(' ')
        }
        builder.WriteString(word)
    }

    return builder.String()
}

// Slice optimization: pre-allocation vs growth
func processDataNaive(input []int) []int {
    var result []int

    for _, value := range input {
        if value%2 == 0 {
            result = append(result, value*2)
        }
    }

    return result
}

func processDataOptimized(input []int) []int {
    // Pre-allocate with estimated capacity
    result := make([]int, 0, len(input)/2)

    for _, value := range input {
        if value%2 == 0 {
            result = append(result, value*2)
        }
    }

    return result
}

// Map optimization: pre-sizing and key types
func buildMapNaive() map[string]int {
    m := make(map[string]int)

    for i := 0; i < 10000; i++ {
        key := fmt.Sprintf("key_%d", i)
        m[key] = i
    }

    return m
}

func buildMapOptimized() map[string]int {
    // Pre-size map to avoid rehashing
    m := make(map[string]int, 10000)

    for i := 0; i < 10000; i++ {
        key := fmt.Sprintf("key_%d", i)
        m[key] = i
    }

    return m
}

// Struct optimization: field ordering and packing
type UnoptimizedStruct struct {
    flag1  bool    // 1 byte + 7 bytes padding
    value1 int64   // 8 bytes
    flag2  bool    // 1 byte + 7 bytes padding
    value2 int64   // 8 bytes
    // Total: 32 bytes
}

type OptimizedStruct struct {
    value1 int64   // 8 bytes
    value2 int64   // 8 bytes
    flag1  bool    // 1 byte
    flag2  bool    // 1 byte + 6 bytes padding
    // Total: 24 bytes
}

// Interface vs concrete types
type Processor interface {
    Process(data []int) int
}

type ConcreteProcessor struct {
    multiplier int
}

func (cp *ConcreteProcessor) Process(data []int) int {
    sum := 0
    for _, value := range data {
        sum += value * cp.multiplier
    }
    return sum
}

func BenchmarkInterfaceVsConcrete(b *testing.B) {
    data := make([]int, 1000)
    for i := range data {
        data[i] = i
    }

    processor := &ConcreteProcessor{multiplier: 2}

    b.Run("Interface", func(b *testing.B) {
        var p Processor = processor
        for i := 0; i < b.N; i++ {
            result := p.Process(data)
            _ = result
        }
    })

    b.Run("Concrete", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            result := processor.Process(data)
            _ = result
        }
    })
}

Memory Pool Optimization

package main

import (
    "sync"
)

// Object pooling for expensive allocations
type Buffer struct {
    data []byte
}

func (b *Buffer) Reset() {
    b.data = b.data[:0]
}

type BufferPool struct {
    pool sync.Pool
}

func NewBufferPool(initialSize int) *BufferPool {
    return &BufferPool{
        pool: sync.Pool{
            New: func() interface{} {
                return &Buffer{
                    data: make([]byte, 0, initialSize),
                }
            },
        },
    }
}

func (bp *BufferPool) Get() *Buffer {
    return bp.pool.Get().(*Buffer)
}

func (bp *BufferPool) Put(b *Buffer) {
    b.Reset()
    bp.pool.Put(b)
}

// Usage example
func processDataWithPool(pool *BufferPool, input []byte) []byte {
    buffer := pool.Get()
    defer pool.Put(buffer)

    // Process data using the pooled buffer
    for _, b := range input {
        if b > 128 {
            buffer.data = append(buffer.data, b)
        }
    }

    // Return copy since buffer will be reused
    result := make([]byte, len(buffer.data))
    copy(result, buffer.data)
    return result
}

// Without pool (allocates new buffer each time)
func processDataWithoutPool(input []byte) []byte {
    var buffer []byte

    for _, b := range input {
        if b > 128 {
            buffer = append(buffer, b)
        }
    }

    return buffer
}

func BenchmarkBufferPool(b *testing.B) {
    input := make([]byte, 10000)
    for i := range input {
        input[i] = byte(i % 256)
    }

    pool := NewBufferPool(1024)

    b.Run("WithPool", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            result := processDataWithPool(pool, input)
            _ = result
        }
    })

    b.Run("WithoutPool", func(b *testing.B) {
        for i := 0; i < b.N; i++ {
            result := processDataWithoutPool(input)
            _ = result
        }
    })
}

Concurrency Optimization

Goroutine Pool Pattern

package main

import (
    "context"
    "runtime"
    "sync"
    "time"
)

type WorkerPool struct {
    workerCount int
    taskQueue   chan Task
    wg          sync.WaitGroup
    ctx         context.Context
    cancel      context.CancelFunc
}

type Task func() error

func NewWorkerPool(workerCount int, queueSize int) *WorkerPool {
    ctx, cancel := context.WithCancel(context.Background())

    return &WorkerPool{
        workerCount: workerCount,
        taskQueue:   make(chan Task, queueSize),
        ctx:         ctx,
        cancel:      cancel,
    }
}

func (wp *WorkerPool) Start() {
    for i := 0; i < wp.workerCount; i++ {
        wp.wg.Add(1)
        go wp.worker()
    }
}

func (wp *WorkerPool) worker() {
    defer wp.wg.Done()

    for {
        select {
        case task := <-wp.taskQueue:
            if err := task(); err != nil {
                // Handle error (log, retry, etc.)
                _ = err
            }
        case <-wp.ctx.Done():
            return
        }
    }
}

func (wp *WorkerPool) Submit(task Task) error {
    select {
    case wp.taskQueue <- task:
        return nil
    case <-wp.ctx.Done():
        return wp.ctx.Err()
    default:
        return fmt.Errorf("task queue full")
    }
}

func (wp *WorkerPool) Stop() {
    wp.cancel()
    close(wp.taskQueue)
    wp.wg.Wait()
}

// Adaptive worker pool that adjusts size based on load
type AdaptiveWorkerPool struct {
    minWorkers  int
    maxWorkers  int
    currentWorkers int
    taskQueue   chan Task
    workerWg    sync.WaitGroup
    ctx         context.Context
    cancel      context.CancelFunc
    mu          sync.RWMutex
}

func NewAdaptiveWorkerPool(minWorkers, maxWorkers, queueSize int) *AdaptiveWorkerPool {
    ctx, cancel := context.WithCancel(context.Background())

    awp := &AdaptiveWorkerPool{
        minWorkers:     minWorkers,
        maxWorkers:     maxWorkers,
        currentWorkers: minWorkers,
        taskQueue:      make(chan Task, queueSize),
        ctx:            ctx,
        cancel:         cancel,
    }

    return awp
}

func (awp *AdaptiveWorkerPool) Start() {
    // Start minimum workers
    for i := 0; i < awp.minWorkers; i++ {
        awp.workerWg.Add(1)
        go awp.worker()
    }

    // Start load monitor
    go awp.monitorLoad()
}

func (awp *AdaptiveWorkerPool) worker() {
    defer awp.workerWg.Done()

    idleTimer := time.NewTimer(30 * time.Second)
    defer idleTimer.Stop()

    for {
        select {
        case task := <-awp.taskQueue:
            idleTimer.Reset(30 * time.Second)
            if err := task(); err != nil {
                _ = err
            }
        case <-idleTimer.C:
            // Worker idle too long, consider shutdown
            awp.mu.Lock()
            if awp.currentWorkers > awp.minWorkers {
                awp.currentWorkers--
                awp.mu.Unlock()
                return
            }
            awp.mu.Unlock()
            idleTimer.Reset(30 * time.Second)
        case <-awp.ctx.Done():
            return
        }
    }
}

func (awp *AdaptiveWorkerPool) monitorLoad() {
    ticker := time.NewTicker(5 * time.Second)
    defer ticker.Stop()

    for {
        select {
        case <-ticker.C:
            queueLen := len(awp.taskQueue)
            queueCap := cap(awp.taskQueue)

            awp.mu.Lock()
            currentWorkers := awp.currentWorkers

            // Scale up if queue is getting full
            if queueLen > queueCap*3/4 && currentWorkers < awp.maxWorkers {
                awp.currentWorkers++
                awp.workerWg.Add(1)
                go awp.worker()
            }

            awp.mu.Unlock()
        case <-awp.ctx.Done():
            return
        }
    }
}

func (awp *AdaptiveWorkerPool) Submit(task Task) error {
    select {
    case awp.taskQueue <- task:
        return nil
    case <-awp.ctx.Done():
        return awp.ctx.Err()
    default:
        return fmt.Errorf("task queue full")
    }
}

func (awp *AdaptiveWorkerPool) Stop() {
    awp.cancel()
    close(awp.taskQueue)
    awp.workerWg.Wait()
}

// Benchmark different concurrency patterns
func BenchmarkConcurrencyPatterns(b *testing.B) {
    work := func() error {
        time.Sleep(time.Microsecond * 100)
        return nil
    }

    b.Run("Goroutine_Per_Task", func(b *testing.B) {
        var wg sync.WaitGroup
        for i := 0; i < b.N; i++ {
            wg.Add(1)
            go func() {
                defer wg.Done()
                work()
            }()
        }
        wg.Wait()
    })

    b.Run("Worker_Pool", func(b *testing.B) {
        pool := NewWorkerPool(runtime.NumCPU(), 1000)
        pool.Start()
        defer pool.Stop()

        var wg sync.WaitGroup
        for i := 0; i < b.N; i++ {
            wg.Add(1)
            pool.Submit(func() error {
                defer wg.Done()
                return work()
            })
        }
        wg.Wait()
    })

    b.Run("Adaptive_Pool", func(b *testing.B) {
        pool := NewAdaptiveWorkerPool(2, runtime.NumCPU()*2, 1000)
        pool.Start()
        defer pool.Stop()

        var wg sync.WaitGroup
        for i := 0; i < b.N; i++ {
            wg.Add(1)
            pool.Submit(func() error {
                defer wg.Done()
                return work()
            })
        }
        wg.Wait()
    })
}

System-Level Optimization

I/O Optimization

package main

import (
    "bufio"
    "io"
    "os"
)

// Inefficient byte-by-byte reading
func readFileNaive(filename string) ([]byte, error) {
    file, err := os.Open(filename)
    if err != nil {
        return nil, err
    }
    defer file.Close()

    var data []byte
    buf := make([]byte, 1)

    for {
        n, err := file.Read(buf)
        if err == io.EOF {
            break
        }
        if err != nil {
            return nil, err
        }
        if n > 0 {
            data = append(data, buf[0])
        }
    }

    return data, nil
}

// Optimized buffered reading
func readFileOptimized(filename string) ([]byte, error) {
    file, err := os.Open(filename)
    if err != nil {
        return nil, err
    }
    defer file.Close()

    // Get file size for pre-allocation
    stat, err := file.Stat()
    if err != nil {
        return nil, err
    }

    data := make([]byte, stat.Size())
    _, err = io.ReadFull(file, data)
    return data, err
}

// Streaming approach for large files
func processLargeFileStreaming(filename string, processor func([]byte) error) error {
    file, err := os.Open(filename)
    if err != nil {
        return err
    }
    defer file.Close()

    reader := bufio.NewReader(file)
    buffer := make([]byte, 64*1024) // 64KB buffer

    for {
        n, err := reader.Read(buffer)
        if err == io.EOF {
            break
        }
        if err != nil {
            return err
        }

        if err := processor(buffer[:n]); err != nil {
            return err
        }
    }

    return nil
}

// Memory-mapped file access for random access patterns
func readFileMapped(filename string) ([]byte, error) {
    file, err := os.Open(filename)
    if err != nil {
        return nil, err
    }
    defer file.Close()

    stat, err := file.Stat()
    if err != nil {
        return nil, err
    }

    // For large files, consider using mmap
    // This is a simplified version - real implementation would use syscalls
    data := make([]byte, stat.Size())
    _, err = io.ReadFull(file, data)
    return data, err
}

Systematic optimization strategies transform Go applications from functional to high-performance, addressing bottlenecks at every level from algorithms to system architecture while maintaining code clarity and maintainability.

results matching ""

No results matching ""