import os, shutil
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
!ls -ltr "/content/drive/MyDrive/Colab Notebooks/archive/Fish_Dataset/Fish_Dataset"
dir = '/content/drive/MyDrive/Colab Notebooks/archive/Fish_Dataset/Fish_Dataset/'
label = []
path = []
for dirname, _,filenames in os.walk(dir):
for filename in filenames:
if os.path.splitext(filename)[1]=='.png':
if dirname.split()[-1]!='GT':
label.append(os.path.split(dirname)[1])
path.append(os.path.join(dirname,filename))
df = pd.DataFrame(columns=['path','label'])
df['path']=path
df['label']=label
df.head()
df.info()
df['label']=df['label'].astype('category')
Converting the "label" column as category type
df['label'].value_counts()
There are 9 category of fishes and each are unifromly distrubuted. so no prob on imbalance in the data
EXPLORATORY DATA ALAYSIS :
import matplotlib.pyplot as plt
import seaborn as sns
df['label'].unique()
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(15,8), constrained_layout=True)
ax=ax.flatten()
j=0
for i in df['label'].unique():
ax[j].imshow(plt.imread(df[df['label']==i].iloc[0,0]))
ax[j].set_title(i)
j=j+1
fig=plt.figure(figsize=(15,8))
sns.countplot(df['label'])
plt.imread(df['path'][1]).shape
Train Test Split :
from sklearn.model_selection import train_test_split
X_train, X_test=train_test_split(df, test_size=0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
Creating the image dataset using Tensorflow
we are using resnet_v2 preprocessing step to check if we can use Reset model in trasform learning
from tensorflow.keras.applications import ResNet50V2, MobileNetV2
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
trainGen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=0.3)
testGen =ImageDataGenerator(preprocessing_function= preprocess_input)
X_train_img = trainGen.flow_from_dataframe(dataframe=X_train, x_col='path', y_col='label',class_mode='categorical', subset='training', color_mode='rgb', batch_size=32)
X_val_img = trainGen.flow_from_dataframe(dataframe=X_train, x_col='path', y_col='label',class_mode='categorical', subset='validation', color_mode='rgb', batch_size=32)
X_test_img =testGen.flow_from_dataframe(dataframe=X_test, x_col='path', y_col='label',class_mode='categorical', color_mode='rgb', batch_size=32, shuffle=False)
Plotting image after preprocessing --
fit, ax= plt.subplots(nrows=2, ncols=3, figsize=(15,8))
ax=ax.flatten()
j=0
for _ in range(6):
img, label = X_test_img.next()
#print(img.shape) # (1,256,256,3)
ax[j].imshow(img[0],)
ax[j].set_title(label[0])
#plt.show()
j=j+1
X_test_img[0][0].shape
image_shape=(256,256,3)
X_train_img.class_indices
X_val_img.class_indices
X_test_img.class_indices
TENSOR FLOW MODEL :
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D
pre_trained= MobileNetV2(include_top=False, pooling='avg', input_shape=image_shape)
#for layers in pre_trained.layers:
# layers.trainable=False
pre_trained.trainable=False
inp_model = pre_trained.input
#x=Flatten()(pre_trained.output)
x=Dense(128, activation='relu')(pre_trained.output)
x=Dropout(0.5)(x)
x=Dense(128, activation='relu')(x)
output=Dense(9, activation='softmax')(x)
model = Model(inputs=inp_model, outputs=output)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
from tensorflow.keras.callbacks import EarlyStopping
results = model.fit(X_train_img,epochs=20, validation_data=X_val_img)
results
import pandas as pd
result = pd.DataFrame(results.history)
fig, ax=plt.subplots(nrows=1, ncols=2,figsize=(18,6))
ax=ax.flatten()
ax[0].plot(result[['accuracy','val_accuracy']])
ax[0].set_title("Accuracy")
ax[1].plot(result[['loss','val_loss']])
ax[1].set_title("Loss")
MODEL PREDICTION :
import numpy as np
pred = model.predict(X_test_img)
pred=np.argmax(pred,axis=1)
pred_df=X_test.copy()
labels={}
for l,v in X_test_img.class_indices.items():
labels.update({v:l})
pred_df['pred']=pred
pred_df['pred']=pred_df['pred'].apply(lambda x: labels[x])
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
print(f"Accuracy Score: {accuracy_score(pred_df['label'],pred_df['pred'])}")
sns.heatmap(confusion_matrix(pred_df['label'],pred_df['pred']), annot=True, fmt='2d')
print(pred_df[pred_df['label']==pred_df['pred']].head(6))
fig, ax=plt.subplots(nrows=2, ncols=3, figsize=(15,8))
ax=ax.flatten()
imlist=pred_df[pred_df['label']==pred_df['pred']].head(6).reset_index()
for i in range(0,6):
ax[i].imshow(plt.imread(imlist['path'][i]))
ax[i].set_title(imlist['label'][i])