Skip to content

Commit

Permalink
pkg/archive: Backup extractor avoids max path limits
Browse files Browse the repository at this point in the history
Signed-off-by: Tiger Kaovilai <[email protected]>
  • Loading branch information
kaovilai committed Nov 24, 2024
1 parent 9f0026d commit 3dcd023
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 32 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/8449-kaovilai
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pkg/archive: Backup extractor avoids max path limits
4 changes: 2 additions & 2 deletions internal/delete/delete_item_action_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func InvokeDeleteActions(ctx *Context) error {
}

// get items out of backup tarball into a temp directory
dir, err := archive.NewExtractor(ctx.Log, ctx.Filesystem).UnzipAndExtractBackup(ctx.BackupReader)
dir, longNames, err := archive.NewExtractor(ctx.Log, ctx.Filesystem).UnzipAndExtractBackup(ctx.BackupReader)
if err != nil {
return errors.Wrapf(err, "error extracting backup")
}
Expand All @@ -71,7 +71,7 @@ func InvokeDeleteActions(ctx *Context) error {

ctx.Log.Debugf("Downloaded and extracted the backup file to: %s", dir)

backupResources, err := archive.NewParser(ctx.Log, ctx.Filesystem).Parse(dir)
backupResources, err := archive.NewParser(ctx.Log, ctx.Filesystem).Parse(dir, longNames)
if existErr := errors.Is(err, archive.ErrNotExist); existErr {
ctx.Log.Debug("ignore invoking delete item actions: ", err)
return nil
Expand Down
36 changes: 24 additions & 12 deletions pkg/archive/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package archive
import (
"archive/tar"
"compress/gzip"
"crypto/sha256"
"fmt"
"io"
"path/filepath"

Expand All @@ -42,11 +44,11 @@ func NewExtractor(log logrus.FieldLogger, fs filesystem.Interface) *Extractor {
}

// UnzipAndExtractBackup extracts a reader on a gzipped tarball to a local temp directory
func (e *Extractor) UnzipAndExtractBackup(src io.Reader) (string, error) {
func (e *Extractor) UnzipAndExtractBackup(src io.Reader) (string, map[string]string, error) {
gzr, err := gzip.NewReader(src)
if err != nil {
e.log.Infof("error creating gzip reader: %v", err)
return "", err
return "", nil, err
}
defer gzr.Close()

Expand All @@ -66,13 +68,18 @@ func (e *Extractor) writeFile(target string, tarRdr *tar.Reader) error {
return nil
}

func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) {
// 255 is common limit for file names.
// -5 to account for .json extension
const maxPathLength = 250

// returns tempfir containing backup contents, map[sha256]longNames, error
func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, map[string]string, error) {
dir, err := e.fs.TempDir("", "")
if err != nil {
e.log.Infof("error creating temp dir: %v", err)
return "", err
return "", nil, err
}

longNames := make(map[string]string)
for {
header, err := tarRdr.Next()

Expand All @@ -81,34 +88,39 @@ func (e *Extractor) readBackup(tarRdr *tar.Reader) (string, error) {
}
if err != nil {
e.log.Infof("error reading tar: %v", err)
return "", err
return "", nil, err
}

target := filepath.Join(dir, header.Name) //nolint:gosec // Internal usage. No need to check.

// If the target is longer than maxPathLength, we'll use the sha256 of the header name as the filename.
// https://github.com/vmware-tanzu/velero/issues/8434
if len(target) > maxPathLength {
shortSha256Name := fmt.Sprintf("%x", sha256.Sum256([]byte(header.Name))) // sha256 name is 64 characters
longNames[shortSha256Name] = header.Name
target = filepath.Join(dir, shortSha256Name)
}
switch header.Typeflag {
case tar.TypeDir:
err := e.fs.MkdirAll(target, header.FileInfo().Mode())
if err != nil {
e.log.Infof("mkdirall error: %v", err)
return "", err
return "", nil, err
}

case tar.TypeReg:
// make sure we have the directory created
err := e.fs.MkdirAll(filepath.Dir(target), header.FileInfo().Mode())
if err != nil {
e.log.Infof("mkdirall error: %v", err)
return "", err
return "", nil, err
}

// create the file
if err := e.writeFile(target, tarRdr); err != nil {
e.log.Infof("error copying: %v", err)
return "", err
return "", nil, err
}
}
}

return dir, nil
return dir, longNames, nil
}
54 changes: 53 additions & 1 deletion pkg/archive/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@ package archive

import (
"archive/tar"
"bytes"
"compress/gzip"
"io"
"os"
"path/filepath"
"strings"
"testing"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"

"github.com/vmware-tanzu/velero/pkg/test"
Expand Down Expand Up @@ -75,7 +79,7 @@ func TestUnzipAndExtractBackup(t *testing.T) {
file, err := ext.fs.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644)
require.NoError(t, err)

_, err = ext.UnzipAndExtractBackup(file.(io.Reader))
_, _, err = ext.UnzipAndExtractBackup(file.(io.Reader))
if tc.wantErr && (err == nil) {
t.Errorf("%s: wanted error but got nil", tc.name)
}
Expand Down Expand Up @@ -149,3 +153,51 @@ func createRegular(fs filesystem.Interface) (string, error) {

return outName, nil
}

func TestReadBackupWithLongFilenames(t *testing.T) {
log := logrus.New()
fs := test.NewFakeFileSystem()
e := NewExtractor(log, fs)

// Create a tar reader with a file that has a very long name
var buf bytes.Buffer
tw := tar.NewWriter(&buf)

// Create a filename longer than maxPathLength
longFilename := strings.Repeat("a", maxPathLength+10) + ".txt"
content := []byte("test content")

hdr := &tar.Header{
Name: longFilename,
Mode: 0600,
Size: int64(len(content)),
Typeflag: tar.TypeReg,
}

if err := tw.WriteHeader(hdr); err != nil {
t.Fatal(err)
}
if _, err := tw.Write(content); err != nil {
t.Fatal(err)
}
tw.Close()

// Read the backup
dir, longNames, err := e.readBackup(tar.NewReader(&buf))

// Verify results
require.NoError(t, err)
require.NotEmpty(t, dir)
require.NotEmpty(t, longNames)

// Verify that a shortened SHA256 name was created and mapped correctly
found := false
for shortName, originalName := range longNames {
if originalName == longFilename {
found = true
// Verify the short name length is within limits
require.LessOrEqual(t, len(filepath.Join(dir, shortName)), maxPathLength)
}
}
require.True(t, found, "Long filename mapping not found")
}
34 changes: 22 additions & 12 deletions pkg/archive/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func NewParser(log logrus.FieldLogger, fs filesystem.Interface) *Parser {

// Parse reads an extracted backup on the file system and returns
// a structured catalog of the resources and items contained within it.
func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
func (p *Parser) Parse(dir string, longNames map[string]string) (map[string]*ResourceItems, error) {
// ensure top-level "resources" directory exists, and read subdirectories
// of it, where each one is expected to correspond to a resource.
resourcesDir := filepath.Join(dir, velerov1api.ResourcesDir)
Expand All @@ -81,9 +81,8 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
p.log.Warnf("Ignoring unexpected file %q in directory %q", resourceDir.Name(), strings.TrimPrefix(resourcesDir, dir+"/"))
continue
}

resourceItems := &ResourceItems{
GroupResource: resourceDir.Name(),
GroupResource: itemNameIfHasLongName(resourceDir.Name(), longNames),
ItemsByNamespace: map[string][]string{},
}

Expand All @@ -95,7 +94,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
return nil, errors.Wrapf(err, "error checking for existence of directory %q", strings.TrimPrefix(clusterScopedDir, dir+"/"))
}
if exists {
items, err := p.getResourceItemsForScope(clusterScopedDir, dir)
items, err := p.getResourceItemsForScope(clusterScopedDir, dir, longNames)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -124,7 +123,7 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
continue
}

items, err := p.getResourceItemsForScope(filepath.Join(namespaceScopedDir, namespaceDir.Name()), dir)
items, err := p.getResourceItemsForScope(filepath.Join(namespaceScopedDir, namespaceDir.Name()), dir, longNames)
if err != nil {
return nil, err
}
Expand All @@ -135,15 +134,15 @@ func (p *Parser) Parse(dir string) (map[string]*ResourceItems, error) {
}
}

resources[resourceDir.Name()] = resourceItems
resources[itemNameIfHasLongName(resourceDir.Name(), longNames)] = resourceItems
}

return resources, nil
}

// getResourceItemsForScope returns the list of items with a namespace or
// cluster-scoped subdirectory for a specific resource.
func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string) ([]string, error) {
func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string, longNames map[string]string) ([]string, error) {
files, err := p.fs.ReadDir(dir)
if err != nil {
return nil, errors.Wrapf(err, "error reading contents of directory %q", strings.TrimPrefix(dir, archiveRootDir+"/"))
Expand All @@ -156,12 +155,22 @@ func (p *Parser) getResourceItemsForScope(dir, archiveRootDir string) ([]string,
continue
}

items = append(items, strings.TrimSuffix(file.Name(), ".json"))
items = append(items, itemNameIfHasLongName(strings.TrimSuffix(file.Name(), ".json"), longNames))
}

return items, nil
}

func itemNameIfHasLongName(itemName string, longNames map[string]string) string {
if longNames == nil {
return itemName
}
if longName, ok := longNames[itemName]; ok {
return longName
}
return itemName
}

// checkAndReadDir is a wrapper around fs.DirExists and fs.ReadDir that does checks
// and returns errors if directory cannot be read.
func (p *Parser) checkAndReadDir(dir string) ([]os.FileInfo, error) {
Expand All @@ -183,7 +192,7 @@ func (p *Parser) checkAndReadDir(dir string) ([]os.FileInfo, error) {

// ParseGroupVersions extracts the versions for each API Group from the backup
// directory names and stores them in a metav1 APIGroup object.
func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, error) {
func (p *Parser) ParseGroupVersions(dir string, longNames map[string]string) (map[string]metav1.APIGroup, error) {
resourcesDir := filepath.Join(dir, velerov1api.ResourcesDir)

// Get the subdirectories inside the "resources" directory. The subdirectories
Expand All @@ -197,8 +206,9 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err

// Loop through the resource.group directory names.
for _, rgd := range rgDirs {
rgdName := itemNameIfHasLongName(rgd.Name(), longNames)
group := metav1.APIGroup{
Name: extractGroupName(rgd.Name()),
Name: extractGroupName(rgdName),
}

rgdPath := filepath.Join(resourcesDir, rgd.Name())
Expand All @@ -213,7 +223,7 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err
var supportedVersions []metav1.GroupVersionForDiscovery

for _, gvd := range gvDirs {
gvdName := gvd.Name()
gvdName := itemNameIfHasLongName(gvd.Name(), longNames)

// Don't save the namespaces or clusters directories in list of
// supported API Group Versions.
Expand Down Expand Up @@ -241,7 +251,7 @@ func (p *Parser) ParseGroupVersions(dir string) (map[string]metav1.APIGroup, err

group.Versions = supportedVersions

resourceAGs[rgd.Name()] = group
resourceAGs[rgdName] = group
}

return resourceAGs, nil
Expand Down
Loading

0 comments on commit 3dcd023

Please sign in to comment.