forked from google/cadvisor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsummary.go
185 lines (169 loc) · 5.65 KB
/
summary.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Maintains the summary of aggregated minute, hour, and day stats.
// For a container running for more than a day, amount of tracked data can go up to
// 40 KB when cpu and memory are tracked. We'll start by enabling collection for the
// node, followed by docker, and then all containers as we understand the usage pattern
// better
// TODO(rjnagal): Optimize the size if we start running it for every container.
package summary
import (
"fmt"
"sync"
"time"
"github.com/google/cadvisor/info/v1"
info "github.com/google/cadvisor/info/v2"
)
// Usage fields we track for generating percentiles.
type secondSample struct {
Timestamp time.Time // time when the sample was recorded.
Cpu uint64 // cpu usage
Memory uint64 // memory usage
}
type availableResources struct {
Cpu bool
Memory bool
}
type StatsSummary struct {
// Resources being tracked for this container.
available availableResources
// list of second samples. The list is cleared when a new minute samples is generated.
secondSamples []*secondSample
// minute percentiles. We track 24 * 60 maximum samples.
minuteSamples *SamplesBuffer
// latest derived instant, minute, hour, and day stats. Instant sample updated every second.
// Others updated every minute.
derivedStats info.DerivedStats // Guarded by dataLock.
dataLock sync.RWMutex
}
// Adds a new seconds sample.
// If enough seconds samples are collected, a minute sample is generated and derived
// stats are updated.
func (s *StatsSummary) AddSample(stat v1.ContainerStats) error {
sample := secondSample{}
sample.Timestamp = stat.Timestamp
if s.available.Cpu {
sample.Cpu = stat.Cpu.Usage.Total
}
if s.available.Memory {
sample.Memory = stat.Memory.WorkingSet
}
s.secondSamples = append(s.secondSamples, &sample)
s.updateLatestUsage()
// TODO(jnagal): Use 'available' to avoid unnecessary computation.
numSamples := len(s.secondSamples)
elapsed := time.Nanosecond
if numSamples > 1 {
start := s.secondSamples[0].Timestamp
end := s.secondSamples[numSamples-1].Timestamp
elapsed = end.Sub(start)
}
if elapsed > 60*time.Second {
// Make a minute sample. This works with dynamic housekeeping as long
// as we keep max dynamic houskeeping period close to a minute.
minuteSample := GetMinutePercentiles(s.secondSamples)
// Clear seconds samples. Keep the latest sample for continuity.
// Copying and resizing helps avoid slice re-allocation.
s.secondSamples[0] = s.secondSamples[numSamples-1]
s.secondSamples = s.secondSamples[:1]
s.minuteSamples.Add(minuteSample)
err := s.updateDerivedStats()
if err != nil {
return err
}
}
return nil
}
func (s *StatsSummary) updateLatestUsage() {
usage := info.InstantUsage{}
numStats := len(s.secondSamples)
if numStats < 1 {
return
}
latest := s.secondSamples[numStats-1]
usage.Memory = latest.Memory
if numStats > 1 {
previous := s.secondSamples[numStats-2]
cpu, err := getCpuRate(*latest, *previous)
if err == nil {
usage.Cpu = cpu
}
}
s.dataLock.Lock()
defer s.dataLock.Unlock()
s.derivedStats.LatestUsage = usage
s.derivedStats.Timestamp = latest.Timestamp
return
}
// Generate new derived stats based on current minute stats samples.
func (s *StatsSummary) updateDerivedStats() error {
derived := info.DerivedStats{}
derived.Timestamp = time.Now()
minuteSamples := s.minuteSamples.RecentStats(1)
if len(minuteSamples) != 1 {
return fmt.Errorf("failed to retrieve minute stats")
}
derived.MinuteUsage = *minuteSamples[0]
hourUsage, err := s.getDerivedUsage(60)
if err != nil {
return fmt.Errorf("failed to compute hour stats: %v", err)
}
dayUsage, err := s.getDerivedUsage(60 * 24)
if err != nil {
return fmt.Errorf("failed to compute day usage: %v", err)
}
derived.HourUsage = hourUsage
derived.DayUsage = dayUsage
s.dataLock.Lock()
defer s.dataLock.Unlock()
derived.LatestUsage = s.derivedStats.LatestUsage
s.derivedStats = derived
return nil
}
// helper method to get hour and daily derived stats
func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
if n < 1 {
return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n)
}
samples := s.minuteSamples.RecentStats(n)
numSamples := len(samples)
if numSamples < 1 {
return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats.")
}
// We generate derived stats even with partial data.
usage := GetDerivedPercentiles(samples)
// Assumes we have equally placed minute samples.
usage.PercentComplete = int32(numSamples * 100 / n)
return usage, nil
}
// Return the latest calculated derived stats.
func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) {
s.dataLock.RLock()
defer s.dataLock.RUnlock()
return s.derivedStats, nil
}
func New(spec v1.ContainerSpec) (*StatsSummary, error) {
summary := StatsSummary{}
if spec.HasCpu {
summary.available.Cpu = true
}
if spec.HasMemory {
summary.available.Memory = true
}
if !summary.available.Cpu && !summary.available.Memory {
return nil, fmt.Errorf("none of the resources are being tracked.")
}
summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
return &summary, nil
}