visit
There are many strategies to handle that situation. For instance, you can run several instances of your application and use autoscaling to match the provisioned resources to the users's needs at any time. However, when the amount of traffic is incredibly huge, scaling out may take time, and servers may be overloaded or fail. I will discuss common strategies to prevent such a situation in this article.
main.go
package main
import (
"fmt"
"log"
"net/http"
"sync"
"time"
)
type bucket struct {
remainingTokens int
lastRefillTime time.Time
}
type RateLimiter struct {
maxTokens int
refillInterval time.Duration
buckets map[string]*bucket
mu sync.Mutex
}
func NewRateLimiter(rate int, perInterval time.Duration) *RateLimiter {
return &RateLimiter{
maxTokens: rate,
refillInterval: perInterval,
buckets: make(map[string]*bucket),
}
}
func (rl *RateLimiter) IsLimitReached(id string) bool {
rl.mu.Lock()
defer rl.mu.Unlock()
b, ok := rl.buckets[id]
// If the bucket doesn't exist, it is the first request for this client.
// Create a new bucket and allow the request.
if !ok {
rl.buckets[id] = &bucket{
remainingTokens: rl.maxTokens - 1,
lastRefillTime: time.Now(),
}
return false
}
// Calculate the number of tokens to add to the bucket since the last
// request.
refillInterval := int(time.Since(b.lastRefillTime) / rl.refillInterval)
tokensAdded := rl.maxTokens * refillInterval
currentTokens := b.remainingTokens + tokensAdded
// There is no tokens to serve the request for this client.
// Reject the request.
if currentTokens < 1 {
return true
}
if currentTokens > rl.maxTokens {
// If the number of current tokens is greater than the maximum allowed,
// then reset the bucket and decrease the number of tokens by 1.
b.lastRefillTime = time.Now()
b.remainingTokens = rl.maxTokens - 1
} else {
// Otherwise, update the bucket and decrease the number of tokens by 1.
deltaTokens := currentTokens - b.remainingTokens
deltaRefills := deltaTokens / rl.maxTokens
deltaTime := time.Duration(deltaRefills) * rl.refillInterval
b.lastRefillTime = b.lastRefillTime.Add(deltaTime)
b.remainingTokens = currentTokens - 1
}
// Allow the request.
return false
}
type Handler struct {
rl *RateLimiter
}
func NewHandler(rl *RateLimiter) *Handler {
return &Handler{rl: rl}
}
func (h *Handler) Handler(w http.ResponseWriter, r *http.Request) {
// Here should be the logic to get the client ID from the request
// (it could be a user ID, an IP address, an API key, etc.)
clientID := "some-client-id"
if h.rl.IsLimitReached(clientID) {
w.WriteHeader(http.StatusTooManyRequests)
fmt.Fprint(w, http.StatusText(http.StatusTooManyRequests))
return
}
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, http.StatusText(http.StatusOK))
}
func main() {
// We allow 1000 requests per second per client to our service.
rl := NewRateLimiter(1000, 1*time.Second)
h := NewHandler(rl)
http.HandleFunc("/", h.Handler)
log.Fatal(http.ListenAndServe(":8080", nil))
}
Let's run our example using Docker Compose with configured for a container. Also, we will use , a benchmarking tool, to check how our application works on different loads. We will use the following Dockerfile
and docker-compose.yml
:
Dockerfile
FROM golang:1.21 AS build-stage
WORKDIR /code
COPY main.go ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /service main.go
FROM gcr.io/distroless/base AS build-release-stage
WORKDIR /
COPY --from=build-stage /service /service
EXPOSE 8080
ENTRYPOINT ["/service"]
docker-compose.yml
services:
rate_limiting:
build: .
ports:
- "8080:8080"
deploy:
resources:
limits:
cpus: '.20'
memory: 100M
reservations:
cpus: '0.10'
memory: 50M
docker compose -f ./cmd/rate_limiting/docker-compose.yml up --build --force-recreate -d
dcocker stats
$ bombardier -c 1 -n 10000 //127.0.0.1:8080/
Bombarding //127.0.0.1:8080/ with 10000 request(s) using 1 connection(s)
10000 / 10000 [===============================================================] 100.00% 859/s 11s
Done!
Statistics Avg Stdev Max
Reqs/sec 868.33 833.53 2684.45
Latency 1.15ms 6.93ms 75.42ms
HTTP codes:
1xx - 0, 2xx - 10000, 3xx - 0, 4xx - 0, 5xx - 0
others - 0
Throughput: 151.93KB/s
$ bombardier -c 100 -n 10000 //127.0.0.1:8080/
Bombarding //127.0.0.1:8080/ with 10000 request(s) using 100 connection(s)
10000 / 10000 [===============================================================] 100.00% 3320/s 3s
Done!
Statistics Avg Stdev Max
Reqs/sec 3395.87 6984.32 32322.59
Latency 28.02ms 37.61ms 196.95ms
HTTP codes:
1xx - 0, 2xx - 3000, 3xx - 0, 4xx - 7000, 5xx - 0
others - 0
Throughput: 675.35KB/s
docker compose -f ./cmd/rate_limiting/docker-compose.yml down --remove-orphans --timeout 1 --volumes
main .go
package main
import (
"context"
"fmt"
"log"
"net/http"
"sync/atomic"
"time"
)
type LoadShedder struct {
isOverloaded atomic.Bool
}
func NewLoadShedder(ctx context.Context, checkInterval, overloadFactor time.Duration) *LoadShedder {
ls := LoadShedder{}
go ls.runOverloadDetector(ctx, checkInterval, overloadFactor)
return &ls
}
func (ls *LoadShedder) runOverloadDetector(ctx context.Context, checkInterval, overloadFactor time.Duration) {
ticker := time.NewTicker(checkInterval)
defer ticker.Stop()
// Start with a fresh start time.
startTime := time.Now()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
// Check how long it took to process the last batch of requests.
elapsed := time.Since(startTime)
if elapsed > overloadFactor {
// If it took longer than the overload factor, we're overloaded.
ls.isOverloaded.Store(true)
} else {
// Otherwise, we're not overloaded.
ls.isOverloaded.Store(false)
}
// Reset the start time.
startTime = time.Now()
}
}
}
func (ls *LoadShedder) IsOverloaded() bool {
return ls.isOverloaded.Load()
}
type Handler struct {
ls *LoadShedder
}
func NewHandler(ls *LoadShedder) *Handler {
return &Handler{ls: ls}
}
func (h *Handler) Handler(w http.ResponseWriter, r *http.Request) {
if h.ls.IsOverloaded() {
w.WriteHeader(http.StatusServiceUnavailable)
fmt.Fprint(w, http.StatusText(http.StatusServiceUnavailable))
return
}
w.WriteHeader(http.StatusOK)
fmt.Fprint(w, http.StatusText(http.StatusOK))
}
func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// The load shedder will check every 100ms if the last batch of requests
// took longer than 200ms.
ls := NewLoadShedder(ctx, 100*time.Millisecond, 200*time.Millisecond)
h := NewHandler(ls)
http.HandleFunc("/", h.Handler)
log.Fatal(http.ListenAndServe(":8080", nil))
}
We define the LoadShedder
struct to run the load detector used in our request handler to check if we should return the HTTP 503 status code or process the request. The LoadShedder
struct has an atomic flag indicating if the system is overloaded. We use atomic boolean value to make it thread-safe. We also have the IsOverloaded
method that returns the current value of the flag. In the NewLoadShedder
function, we create a new LoadShedder
and run the overload detector in a goroutine that checks according to the specified interval if the system is overloaded based on the overload factor. We check how much time has passed since the last check. If it is higher than the overload factor, the system is overloaded. That means that our request handler used resources for too long, and we need more capacity to handle the requests.
Let's run our example using. We will use the following Dockerfile
and docker-compose.yml
:
Dockerfile
FROM golang:1.21 AS build-stage
WORKDIR /code
COPY main.go ./
RUN CGO_ENABLED=0 GOOS=linux go build -o /service main.go
FROM gcr.io/distroless/base AS build-release-stage
WORKDIR /
COPY --from=build-stage /service /service
EXPOSE 8080
ENTRYPOINT ["/service"]
docker-compose.yml
services:
load_shedding:
build: .
ports:
- "8080:8080"
deploy:
resources:
limits:
cpus: '.20'
memory: 100M
reservations:
cpus: '0.10'
memory: 50M
docker compose -f ./cmd/load_shedding/docker-compose.yml up --build --force-recreate -d
dcocker stats
$ bombardier -c 10 -n 10000 //127.0.0.1:8080/
Bombarding //127.0.0.1:8080/ with 10000 request(s) using 10 connection(s)
10000 / 10000 [===============================================================] 100.00% 1346/s 7s
Done!
Statistics Avg Stdev Max
Reqs/sec 1389.49 1582.00 6284.42
Latency 7.24ms 22.41ms 98.43ms
HTTP codes:
1xx - 0, 2xx - 10000, 3xx - 0, 4xx - 0, 5xx - 0
others - 0
Throughput: 242.78KB/s
$ bombardier -c 1000 -n 10000 //127.0.0.1:8080/
Bombarding //127.0.0.1:8080/ with 10000 request(s) using 1000 connection(s)
10000 / 10000 [===============================================================] 100.00% 3791/s 2s
Done!
Statistics Avg Stdev Max
Reqs/sec 4242.28 11985.30 58592.64
Latency 211.67ms 210.89ms 1.54s
HTTP codes:
1xx - 0, 2xx - 8823, 3xx - 0, 4xx - 0, 5xx - 1177
others - 0
Throughput: 696.96KB/s
docker compose -f ./cmd/load_shedding/docker-compose.yml down --remove-orphans --timeout 1 --volumes