-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataloader.py
executable file
·116 lines (93 loc) · 3.46 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python3
from copy import deepcopy
from pathlib import Path
from typing import Literal
import numpy as np
from python_tools import caching
from python_tools.ml.data_loader import DataLoader
from python_tools.ml.metrics import concat_dicts
from python_tools.ml.pytorch_tools import dict_to_batched_data
class SEWA:
def __init__(
self,
*,
dimension: str = "valence",
ifold: int = 0,
name: Literal["training", "validation", "test"] = "training",
) -> None:
self.dimension = dimension
self.ifold = ifold
self.name = name
def get_cache_path(self) -> Path:
return Path("cache") / f"SEWA_{self.dimension}_{self.name}_{self.ifold}.pickle"
def get_loader(self) -> DataLoader:
cache = self.get_cache_path()
data_properties = caching.read_pickle(cache)
data, properties = data_properties
self.properties = properties
assert all(x["x"][0].shape[1] == properties["x_names"].shape[0] for x in data)
return DataLoader(data, properties=deepcopy(properties))
class MOSEI(SEWA):
def get_cache_path(self) -> Path:
return Path("cache") / f"MOSEI_{self.dimension}_{self.name}_{self.ifold}.pickle"
def get_loader(self) -> DataLoader:
loader = super().get_loader()
# avoid many small clip-level batches
data = concat_dicts(
[
{key: value[0] for key, value in batch.items()}
for batch in loader.iterator
]
)
data["meta_begin"] = np.clip(data["meta_begin"], 0.0, None)
data["meta_end"] = np.clip(data["meta_end"], data["meta_begin"] + 0.1, None)
loader.iterator = dict_to_batched_data(data, batch_size=2048, shuffle=True)
return loader
class MOSI(MOSEI):
def get_cache_path(self) -> Path:
return Path("cache") / f"MOSI_{self.dimension}_{self.name}_{self.ifold}.pickle"
class IEMOCAP(SEWA):
def get_cache_path(self) -> Path:
return (
Path("cache") / f"IEMOCAP_{self.dimension}_{self.name}_{self.ifold}.pickle"
)
def get_loader(self) -> DataLoader:
loader = super().get_loader()
# avoid many small clip-level batches
data = concat_dicts(
[
{key: value[0] for key, value in batch.items()}
for batch in loader.iterator
]
)
loader.iterator = dict_to_batched_data(data, batch_size=2048, shuffle=True)
return loader
class Test(SEWA):
def get_cache_path(self) -> Path:
return Path("cache") / f"Test_{self.dimension}_{self.name}_{self.ifold}.pickle"
class Instagram(Test):
semiotic = ["divergent", "parallel", "additive"]
contextual = ["minimal", "close", "transcendent"]
intent = [
"provoke",
"inform",
"advocate",
"entertain",
"expose",
"express",
"promote",
]
def get_cache_path(self) -> Path:
return (
Path("cache")
/ f"Instagram_{self.dimension}_{self.name}_{self.ifold}.pickle"
)
def get_loader(self) -> DataLoader:
loader = super().get_loader()
loader.properties["y_names"] = np.array(self.dimensions)
for batch in loader.iterator:
batch["y"][0] = batch["y"][0].astype(int)
return loader
class PANAM(SEWA):
def get_cache_path(self) -> Path:
return Path("cache") / f"PANAM_{self.dimension}_{self.name}_{self.ifold}.pickle"