본문 바로가기
Competition

데이콘 Basic 수화 이미지 분류 경진대회 (8등 / 418명, Top 1.91%)

by mean. 2024. 3. 30.
728x90
반응형

timm을 사용하여서 model 구성하여 작성하였습니다.

Code

import warnings
from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2
import os, gc
import timm
import random
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
import time
from sklearn.model_selection import StratifiedKFold

device = torch.device('cuda:0')
warnings.filterwarnings('ignore')

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

path = 'C:/Users/ideal/Downloads/jupyter/user_data/'

train_png = sorted(glob(path + 'train/*.png'))
test_png = sorted(glob(path + 'test/*.png'))

train_y = pd.read_csv(path +"train.csv")
train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}
train_labels = [label_unique[k] for k in train_labels]

def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    return img

train_imgs = [img_load(m) for m in tqdm(train_png)]
test_imgs = [img_load(n) for n in tqdm(test_png)]

np.save(path + 'train_imgs_384', np.array(train_imgs))
np.save(path + 'test_imgs_384', np.array(test_imgs))

train_imgs = np.load(path + 'train_imgs_384.npy')
test_imgs = np.load(path + 'test_imgs_384.npy')

meanRGB = [np.mean(x, axis=(0,1)) for x in train_imgs]
stdRGB = [np.std(x, axis=(0,1)) for x in train_imgs]

train_meanR = np.mean([m[0] for m in meanRGB])/255
train_meanG = np.mean([m[1] for m in meanRGB])/255
train_meanB = np.mean([m[2] for m in meanRGB])/255

train_stdR = np.mean([s[0] for s in stdRGB])/255
train_stdG = np.mean([s[1] for s in stdRGB])/255
train_stdB = np.mean([s[2] for s in stdRGB])/255

print("train 평균",train_meanR, train_meanG, train_meanB)
print("train 표준편차",train_stdR, train_stdG, train_stdB)

meanRGB = [np.mean(x, axis=(0,1)) for x in test_imgs]
stdRGB = [np.std(x, axis=(0,1)) for x in test_imgs]

test_meanR = np.mean([m[0] for m in meanRGB])/255
test_meanG = np.mean([m[1] for m in meanRGB])/255
test_meanB = np.mean([m[2] for m in meanRGB])/255

test_stdR = np.mean([s[0] for s in stdRGB])/255
test_stdG = np.mean([s[1] for s in stdRGB])/255
test_stdB = np.mean([s[2] for s in stdRGB])/255

print("test 평균",test_meanR, test_meanG, test_meanB)
print("test 표준편차",test_stdR, test_stdG, test_stdB)

class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode == 'train':
          train_transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean = [train_meanR, train_meanG, train_meanB],
                                     std = [train_stdR, train_stdG, train_stdB]),
                transforms.RandomAffine((-45, 45)),

            ])
          img = train_transform(img)
        if self.mode == 'test':
          test_transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean = [test_meanR, test_meanG, test_meanB],
                                     std = [test_stdR, test_stdG, test_stdB])
            ])
          img = test_transform(img)


        label = self.labels[idx]
        return img, label

class Network(nn.Module):
    def __init__(self,mode = 'train'):
        super(Network, self).__init__()
        self.mode = mode
        if self.mode == 'train':
          self.model = timm.create_model('vgg19', pretrained=True, num_classes=11)
        if self.mode == 'test':
          self.model = timm.create_model('vgg19', pretrained=True, num_classes=11)

    def forward(self, x):
        x = self.model(x)
        return x

cv = StratifiedKFold(n_splits = 5, random_state = 2022, shuffle = True)
batch_size = 8
epochs = 100
pred_ensemble = []

for idx, (train_idx, val_idx) in enumerate(cv.split(train_imgs, np.array(train_labels))):
  print("----------fold_{} start!----------".format(idx))
  t_imgs, val_imgs = train_imgs[train_idx],  train_imgs[val_idx]
  t_labels, val_labels = np.array(train_labels)[train_idx], np.array(train_labels)[val_idx]

  # Train
  train_dataset = Custom_dataset(np.array(t_imgs), np.array(t_labels), mode='train')
  train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

  # Val
  val_dataset = Custom_dataset(np.array(val_imgs), np.array(val_labels), mode='test')
  val_loader = DataLoader(val_dataset, shuffle=True, batch_size=batch_size)

  gc.collect()
  torch.cuda.empty_cache()
  best=0

  model = Network().to(device)

  optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay = 1e-3)
  criterion = nn.CrossEntropyLoss()
  scaler = torch.cuda.amp.GradScaler()  

  best_f1 = 0
  early_stopping = 0
  for epoch in range(epochs):
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
    train_f1 = score_function(train_y, train_pred)
    state_dict= model.state_dict()
    model.eval()
    with torch.no_grad():
      val_loss = 0 
      val_pred = []
      val_y = []


      for batch in (val_loader):
        x_val = torch.tensor(batch[0], dtype = torch.float32, device = device)
        y_val = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred_val = model(x_val)
        loss_val = criterion(pred_val, y_val)

        val_loss += loss_val.item()/len(val_loader)
        val_pred += pred_val.argmax(1).detach().cpu().numpy().tolist()
        val_y += y_val.detach().cpu().numpy().tolist()
      val_f1 = score_function(val_y, val_pred)

      if val_f1 > best_f1:
        best_epoch = epoch
        best_loss = val_loss
        best_f1 = val_f1
        early_stopping = 0

        torch.save({'epoch':epoch,
                    'state_dict':state_dict,
                    'optimizer': optimizer.state_dict(),
                    'scaler': scaler.state_dict(),
             }, path +'best_model_{}.pth'.format(idx))
        print('-----------------SAVE:{} epoch----------------'.format(best_epoch+1))
      else:
          early_stopping += 1

            # Early Stopping
      if early_stopping == 20:
        TIME = time.time() - start
        print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
        print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')
        print(f'Val    loss : {val_loss:.5f}    f1 : {val_f1:.5f}')
        break

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    f1 : {train_f1:.5f}')
    print(f'Val    loss : {val_loss:.5f}    f1 : {val_f1:.5f}')

pred_ensemble = []
batch_size = 8
# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

for i in range(5):
  model_test = Network(mode = 'test').to(device)
  model_test.load_state_dict(torch.load((path+'best_model_{}.pth'.format(i)))['state_dict'])
  model_test.eval()
  pred_prob = []
  with torch.no_grad():
      for batch in (test_loader):
          x = torch.tensor(batch[0], dtype = torch.float32, device = device)
          with torch.cuda.amp.autocast():
              pred = model_test(x)
              pred_prob.extend(pred.detach().cpu().numpy())
      pred_ensemble.append(pred_prob)

pred = (np.array(pred_ensemble[0])+ np.array(pred_ensemble[1])+ np.array(pred_ensemble[3]) + np.array(pred_ensemble[4]) )/4
f_pred = np.array(pred).argmax(1).tolist()
label_decoder = {val:key for key, val in label_unique.items()}
f_result = [label_decoder[result] for result in f_pred]
submission = pd.read_csv(path + "sample_submission.csv")
submission["label"] = f_result
submission.to_csv(path + "submit.csv", index = False)

Result

https://dacon.io/competitions/official/235896/overview/description

 

수화 이미지 분류 AI 해커톤 - DACON

분석시각화 대회 코드 공유 게시물은 내용 확인 후 좋아요(투표) 가능합니다.

dacon.io

https://dacon.io/competitions/official/235896/codeshare/5049?page=1&dtype=recent

 

[Private 8st, 0.98148] VGG19, Normalize

수화 이미지 분류 AI 해커톤

dacon.io

 

728x90
반응형