[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
This commit is contained in:
Hongxin Liu
2023-09-19 14:20:26 +08:00
committed by GitHub
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions

View File

@@ -1,15 +1,16 @@
from typing import Dict
import numpy as np
from omegaconf import DictConfig, ListConfig
import torch
from torch.utils.data import Dataset
from pathlib import Path
import json
from PIL import Image
from torchvision import transforms
from pathlib import Path
from typing import Dict
import torch
from datasets import load_dataset
from einops import rearrange
from ldm.util import instantiate_from_config
from datasets import load_dataset
from omegaconf import DictConfig, ListConfig
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
def make_multi_folder_data(paths, caption_files=None, **kwargs):
"""Make a concat dataset from multiple folders
@@ -19,10 +20,9 @@ def make_multi_folder_data(paths, caption_files=None, **kwargs):
"""
list_of_paths = []
if isinstance(paths, (Dict, DictConfig)):
assert caption_files is None, \
"Caption files not yet supported for repeats"
assert caption_files is None, "Caption files not yet supported for repeats"
for folder_path, repeats in paths.items():
list_of_paths.extend([folder_path]*repeats)
list_of_paths.extend([folder_path] * repeats)
paths = list_of_paths
if caption_files is not None:
@@ -31,8 +31,10 @@ def make_multi_folder_data(paths, caption_files=None, **kwargs):
datasets = [FolderData(p, **kwargs) for p in paths]
return torch.utils.data.ConcatDataset(datasets)
class FolderData(Dataset):
def __init__(self,
def __init__(
self,
root_dir,
caption_file=None,
image_transforms=[],
@@ -40,7 +42,7 @@ class FolderData(Dataset):
default_caption="",
postprocess=None,
return_paths=False,
) -> None:
) -> None:
"""Create a dataset from a folder of images.
If you pass in a root directory it will be searched for images
ending in ext (ext can be a list)
@@ -75,12 +77,12 @@ class FolderData(Dataset):
self.paths.extend(list(self.root_dir.rglob(f"*.{e}")))
if isinstance(image_transforms, ListConfig):
image_transforms = [instantiate_from_config(tt) for tt in image_transforms]
image_transforms.extend([transforms.ToTensor(),
transforms.Lambda(lambda x: rearrange(x * 2. - 1., 'c h w -> h w c'))])
image_transforms.extend(
[transforms.ToTensor(), transforms.Lambda(lambda x: rearrange(x * 2.0 - 1.0, "c h w -> h w c"))]
)
image_transforms = transforms.Compose(image_transforms)
self.tform = image_transforms
def __len__(self):
if self.captions is not None:
return len(self.captions.keys())
@@ -94,7 +96,7 @@ class FolderData(Dataset):
caption = self.captions.get(chosen, None)
if caption is None:
caption = self.default_caption
filename = self.root_dir/chosen
filename = self.root_dir / chosen
else:
filename = self.paths[index]
@@ -119,22 +121,23 @@ class FolderData(Dataset):
im = im.convert("RGB")
return self.tform(im)
def hf_dataset(
name,
image_transforms=[],
image_column="img",
label_column="label",
text_column="txt",
split='train',
image_key='image',
caption_key='txt',
):
"""Make huggingface dataset with appropriate list of transforms applied
"""
split="train",
image_key="image",
caption_key="txt",
):
"""Make huggingface dataset with appropriate list of transforms applied"""
ds = load_dataset(name, split=split)
image_transforms = [instantiate_from_config(tt) for tt in image_transforms]
image_transforms.extend([transforms.ToTensor(),
transforms.Lambda(lambda x: rearrange(x * 2. - 1., 'c h w -> h w c'))])
image_transforms.extend(
[transforms.ToTensor(), transforms.Lambda(lambda x: rearrange(x * 2.0 - 1.0, "c h w -> h w c"))]
)
tform = transforms.Compose(image_transforms)
assert image_column in ds.column_names, f"Didn't find column {image_column} in {ds.column_names}"
@@ -144,7 +147,18 @@ def hf_dataset(
processed = {}
processed[image_key] = [tform(im) for im in examples[image_column]]
label_to_text_dict = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer", 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
label_to_text_dict = {
0: "airplane",
1: "automobile",
2: "bird",
3: "cat",
4: "deer",
5: "dog",
6: "frog",
7: "horse",
8: "ship",
9: "truck",
}
processed[caption_key] = [label_to_text_dict[label] for label in examples[label_column]]
@@ -153,6 +167,7 @@ def hf_dataset(
ds.set_transform(pre_process)
return ds
class TextOnly(Dataset):
def __init__(self, captions, output_size, image_key="image", caption_key="txt", n_gpus=1):
"""Returns only captions with dummy images"""
@@ -166,7 +181,7 @@ class TextOnly(Dataset):
if n_gpus > 1:
# hack to make sure that all the captions appear on each gpu
repeated = [n_gpus*[x] for x in self.captions]
repeated = [n_gpus * [x] for x in self.captions]
self.captions = []
[self.captions.extend(x) for x in repeated]
@@ -175,10 +190,10 @@ class TextOnly(Dataset):
def __getitem__(self, index):
dummy_im = torch.zeros(3, self.output_size, self.output_size)
dummy_im = rearrange(dummy_im * 2. - 1., 'c h w -> h w c')
dummy_im = rearrange(dummy_im * 2.0 - 1.0, "c h w -> h w c")
return {self.image_key: dummy_im, self.caption_key: self.captions[index]}
def _load_caption_file(self, filename):
with open(filename, 'rt') as f:
with open(filename, "rt") as f:
captions = f.readlines()
return [x.strip('\n') for x in captions]
return [x.strip("\n") for x in captions]