본문 바로가기
Competition

데이콘 Basic 서울 랜드마크 이미지 분류 경진대회 (3등 / 420명, Top 0.71%)

by mean. 2024. 3. 30.
728x90
반응형

Code

import cv2, torch, os, timm, time, warnings, gc, zipfile, telegram, sys, argparse, tqdm, matplotlib.pyplot, torchvision.transforms, pandas, numpy
from tqdm import tqdm
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
import pandas as pd
import os
from glob import glob

device = torch.device('cuda:0')
warnings.filterwarnings('ignore')

fantasy_zip = zipfile.ZipFile('./seoul_dataset.zip')
fantasy_zip.extractall('./seoul_dataset')
fantasy_zip.close()

label_df = pd.read_csv('./seoul_dataset/train.csv')

def get_train_data(data_dir):
    img_path_list = []
    label_list = []
    img_path_list.extend(glob(os.path.join(data_dir, '*.PNG')))
    img_path_list.sort(key=lambda x:int(x.split('/')[-1].split('.')[0]))
    label_list.extend(label_df['label'])

    return img_path_list, label_list

def get_test_data(data_dir):
    img_path_list = []
    img_path_list.extend(glob(os.path.join(data_dir, '*.PNG')))
    img_path_list.sort(key=lambda x:int(x.split('/')[-1].split('.')[0]))
    #print(img_path_list)

    return img_path_list

train_path, train_label = get_train_data('./seoul_dataset/train')
test_path = get_test_data('./seoul_dataset/test')
train_label_info = {label:i for i, label in enumerate(sorted(set(train_label)))}
label_unique = sorted(numpy.unique(train_label))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}
train_labels = [label_unique[k] for k in train_label]

data_dir = './seoul_dataset/'

def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    return img

train_imgs = [img_load(m) for m in tqdm(train_path)]
test_imgs = [img_load(i) for i in tqdm(test_path)]
numpy.save(data_dir + 'train_imgs', numpy.array(train_imgs))
numpy.save(data_dir + 'test_imgs', numpy.array(test_imgs))
train_imgs = numpy.load(data_dir + 'train_imgs.npy')
test_imgs = numpy.load(data_dir + 'test_imgs.npy')

def f1_score_function(real, pred):
    score = accuracy_score(real, pred)
    return score

class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode == 'train':
          train_transform = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Resize((224,224)),
                torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
          img = train_transform(img)
        if self.mode == 'test':
          test_transform = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Resize((224,224)),
                torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
          img = test_transform(img)

        label = self.labels[idx]
        return img, label

class Network(torch.nn.Module):
    def __init__(self, mode = 'train'):
        super(Network, self).__init__()
        self.mode = mode
        if self.mode == 'train':
          self.model = timm.create_model(model_name, pretrained = True, num_classes = 10)
        if self.mode == 'test':
          self.model = timm.create_model(model_name, pretrained = True, num_classes = 10)
    def forward(self, x):
        x = self.model(x)
        return x

batch_size = 128
epochs = 100
model_name = "densenet121"
cv = StratifiedKFold(n_splits = 5, random_state = 2022, shuffle = True)
pred_ensemble = []

for idx, (train_idx, val_idx) in enumerate(cv.split(train_imgs, numpy.array(train_labels))):
  print("-----------------fold_{} start!----------------".format(idx))
  t_imgs, val_imgs = train_imgs[train_idx], train_imgs[val_idx]
  t_labels, val_labels = numpy.array(train_labels)[train_idx], numpy.array(train_labels)[val_idx]

  train_dataset = Custom_dataset(numpy.array(t_imgs), numpy.array(t_labels), mode = 'train')
  train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
  val_dataset = Custom_dataset(numpy.array(val_imgs), numpy.array(val_labels), mode = 'test')
  val_loader = DataLoader(val_dataset, shuffle = True, batch_size = batch_size)

  gc.collect()
  torch.cuda.empty_cache()
  best = 0
  model = Network().to(device)
  optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-4, weight_decay = 1e-3)
  criterion = torch.nn.CrossEntropyLoss()
  scaler = torch.cuda.amp.GradScaler()
  best_f1 = 0
  early_stopping = 0

  for epoch in range(epochs):
    start = time.time()
    train_loss = 0
    train_pred = []
    train_y = []
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
    train_f1 = f1_score_function(train_y, train_pred)
    state_dict = model.state_dict()
    model.eval()
    with torch.no_grad():
      val_loss = 0
      val_pred = []
      val_y = []

      for batch in (val_loader):
        x_val = torch.tensor(batch[0], dtype = torch.float32, device = device)
        y_val = torch.tensor(batch[1], dtype = torch.long, device = device)
        with torch.cuda.amp.autocast():
            pred_val = model(x_val)
        loss_val = criterion(pred_val, y_val)

        val_loss += loss_val.item()/len(val_loader)
        val_pred += pred_val.argmax(1).detach().cpu().numpy().tolist()
        val_y += y_val.detach().cpu().numpy().tolist()
      val_f1 = f1_score_function(val_y, val_pred)

      if val_f1 > best_f1:
        best_epoch = epoch
        best_loss = val_loss
        best_f1 = val_f1
        early_stopping = 0
        torch.save({'epoch':epoch,
                    'state_dict':state_dict,
                    'optimizer': optimizer.state_dict(),
                    'scaler': scaler.state_dict(),
             }, data_dir + model_name +'-best_model_{}.pth'.format(idx))
        print('-----------------SAVE:{} epoch----------------'.format(best_epoch+1))
      else:
          early_stopping += 1
    # Early Stopping
      if early_stopping == 20:
        TIME = time.time() - start
        print(f'Epoch : {epoch+1}/{epochs}, time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s, Train loss : {train_loss:.5f}, f1 : {train_f1:.5f}, Val loss : {val_loss:.5f}, f1 : {val_f1:.5f}')
        print("Early stopping")
        break

    TIME = time.time() - start
    print(f'Epoch : {epoch+1}/{epochs}, time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s, Train loss : {train_loss:.5f}, f1 : {train_f1:.5f}, Val loss : {val_loss:.5f}, f1 : {val_f1:.5f}')

pred_ensemble = []
test_dataset = Custom_dataset(numpy.array(test_imgs), numpy.array(["tmp"]*len(test_imgs)), mode = 'test')
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)
for i in range(5):
  model_test = Network(mode = 'test').to(device)
  model_test.load_state_dict(torch.load((data_dir+ model_name +'-best_model_{}.pth'.format(i)))['state_dict'])
  model_test.eval()
  pred_prob = []
  with torch.no_grad():
      for batch in (test_loader):
          x = torch.tensor(batch[0], dtype = torch.float32, device = device)
          with torch.cuda.amp.autocast():
              pred = model_test(x)
              pred_prob.extend(pred.detach().cpu().numpy())
      pred_ensemble.append(pred_prob)

pred = (numpy.array(pred_ensemble[0]) + numpy.array(pred_ensemble[1]) + numpy.array(pred_ensemble[2]) + numpy.array(pred_ensemble[3]) + numpy.array(pred_ensemble[4]))/5
f_pred = numpy.array(pred).argmax(1).tolist()
label_decoder = {val:key for key, val in label_unique.items()}
f_result = [label_decoder[result] for result in f_pred]
submission = pd.read_csv('./seoul_dataset/sample_submission.csv')
submission['label'] = f_result
submission.to_csv('./seoul_dataset/submit.csv', index=False)

Result

https://dacon.io/competitions/official/235957/overview/description

 

서울 랜드마크 이미지 분류 AI 해커톤 - DACON

분석시각화 대회 코드 공유 게시물은 내용 확인 후 좋아요(투표) 가능합니다.

dacon.io

https://dacon.io/codeshare/6483

 

ideal9, Private 3위, Private 점수 : 1, 5-fold, ensemble

서울 랜드마크 이미지 분류 AI 해커톤

dacon.io

 

728x90
반응형