Fish Image Classification using CNN
In [1]:
import os, shutil
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
Mounted at /content/drive
In [3]:
!ls -ltr "/content/drive/MyDrive/Colab Notebooks/archive/Fish_Dataset/Fish_Dataset"
total 66
-rw------- 1 root root  2822 Apr 29  2021  README.txt
-rw------- 1 root root  3976 Apr 29  2021  Segmentation_example_script.m
-rw------- 1 root root 18486 Apr 29  2021  license.txt
drwx------ 2 root root  4096 Aug  5 03:50  Trout
drwx------ 2 root root  4096 Aug  5 03:50 'Striped Red Mullet'
drwx------ 2 root root  4096 Aug  5 03:50 'Hourse Mackerel'
drwx------ 2 root root  4096 Aug  5 03:50 'Black Sea Sprat'
drwx------ 2 root root  4096 Aug  5 03:50  Shrimp
drwx------ 2 root root  4096 Aug  5 03:50 'Sea Bass'
drwx------ 2 root root  4096 Aug  5 03:50 'Red Sea Bream'
drwx------ 2 root root  4096 Aug  5 03:50 'Red Mullet'
drwx------ 2 root root  4096 Aug  5 03:50 'Gilt-Head Bream'
drwx------ 2 root root  4096 Aug  5 17:44  fish_dataset_new
In [4]:
dir = '/content/drive/MyDrive/Colab Notebooks/archive/Fish_Dataset/Fish_Dataset/'
label = []
path = []
for dirname, _,filenames in os.walk(dir):
    for filename in filenames:
        if os.path.splitext(filename)[1]=='.png':
            if dirname.split()[-1]!='GT':          
                label.append(os.path.split(dirname)[1])
                path.append(os.path.join(dirname,filename))

df = pd.DataFrame(columns=['path','label'])
df['path']=path
df['label']=label
In [5]:
df.head()
Out[5]:
path label
0 /content/drive/MyDrive/Colab Notebooks/archive... Striped Red Mullet
1 /content/drive/MyDrive/Colab Notebooks/archive... Striped Red Mullet
2 /content/drive/MyDrive/Colab Notebooks/archive... Striped Red Mullet
3 /content/drive/MyDrive/Colab Notebooks/archive... Striped Red Mullet
4 /content/drive/MyDrive/Colab Notebooks/archive... Striped Red Mullet
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9000 entries, 0 to 8999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   path    9000 non-null   object
 1   label   9000 non-null   object
dtypes: object(2)
memory usage: 140.8+ KB
In [7]:
df['label']=df['label'].astype('category')

Converting the "label" column as category type

In [8]:
df['label'].value_counts()
Out[8]:
Black Sea Sprat       1000
Gilt-Head Bream       1000
Hourse Mackerel       1000
Red Mullet            1000
Red Sea Bream         1000
Sea Bass              1000
Shrimp                1000
Striped Red Mullet    1000
Trout                 1000
Name: label, dtype: int64

There are 9 category of fishes and each are unifromly distrubuted. so no prob on imbalance in the data

EXPLORATORY DATA ALAYSIS :

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns

df['label'].unique()
Out[9]:
['Striped Red Mullet', 'Trout', 'Hourse Mackerel', 'Black Sea Sprat', 'Red Sea Bream', 'Shrimp', 'Gilt-Head Bream', 'Red Mullet', 'Sea Bass']
Categories (9, object): ['Black Sea Sprat', 'Gilt-Head Bream', 'Hourse Mackerel', 'Red Mullet', ...,
                         'Sea Bass', 'Shrimp', 'Striped Red Mullet', 'Trout']
In [10]:
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(15,8), constrained_layout=True)
ax=ax.flatten()
j=0
for i in df['label'].unique():
    
    ax[j].imshow(plt.imread(df[df['label']==i].iloc[0,0]))
    ax[j].set_title(i)
    j=j+1
In [11]:
fig=plt.figure(figsize=(15,8))
sns.countplot(df['label'])
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2c05f3ef90>
In [12]:
plt.imread(df['path'][1]).shape
Out[12]:
(445, 590, 3)

Train Test Split :

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test=train_test_split(df, test_size=0.2, random_state=42)
In [14]:
print(X_train.shape)
print(X_test.shape)
(7200, 2)
(1800, 2)

Creating the image dataset using Tensorflow

we are using resnet_v2 preprocessing step to check if we can use Reset model in trasform learning

In [15]:
from tensorflow.keras.applications import ResNet50V2, MobileNetV2
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

trainGen = ImageDataGenerator(preprocessing_function=preprocess_input, validation_split=0.3)
testGen =ImageDataGenerator(preprocessing_function= preprocess_input)
X_train_img = trainGen.flow_from_dataframe(dataframe=X_train, x_col='path', y_col='label',class_mode='categorical', subset='training', color_mode='rgb', batch_size=32)
X_val_img = trainGen.flow_from_dataframe(dataframe=X_train, x_col='path', y_col='label',class_mode='categorical', subset='validation', color_mode='rgb', batch_size=32)
X_test_img =testGen.flow_from_dataframe(dataframe=X_test, x_col='path', y_col='label',class_mode='categorical', color_mode='rgb', batch_size=32, shuffle=False)
Found 5040 validated image filenames belonging to 9 classes.
Found 2160 validated image filenames belonging to 9 classes.
Found 1800 validated image filenames belonging to 9 classes.

Plotting image after preprocessing --

In [16]:
fit, ax= plt.subplots(nrows=2, ncols=3, figsize=(15,8))
ax=ax.flatten()
j=0
for _ in range(6):
    img, label = X_test_img.next()
    #print(img.shape)   #  (1,256,256,3)
    ax[j].imshow(img[0],)
    ax[j].set_title(label[0])
    #plt.show()
    j=j+1
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
/usr/local/lib/python3.7/dist-packages/matplotlib/text.py:1165: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if s != self._text:
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
In [17]:
X_test_img[0][0].shape
Out[17]:
(32, 256, 256, 3)
In [18]:
image_shape=(256,256,3)
In [19]:
X_train_img.class_indices
Out[19]:
{'Black Sea Sprat': 0,
 'Gilt-Head Bream': 1,
 'Hourse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}
In [20]:
X_val_img.class_indices
Out[20]:
{'Black Sea Sprat': 0,
 'Gilt-Head Bream': 1,
 'Hourse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}
In [21]:
X_test_img.class_indices
Out[21]:
{'Black Sea Sprat': 0,
 'Gilt-Head Bream': 1,
 'Hourse Mackerel': 2,
 'Red Mullet': 3,
 'Red Sea Bream': 4,
 'Sea Bass': 5,
 'Shrimp': 6,
 'Striped Red Mullet': 7,
 'Trout': 8}

TENSOR FLOW MODEL :

In [23]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPooling2D
pre_trained= MobileNetV2(include_top=False, pooling='avg', input_shape=image_shape)

#for layers in pre_trained.layers:
#    layers.trainable=False
pre_trained.trainable=False

inp_model = pre_trained.input
#x=Flatten()(pre_trained.output)
x=Dense(128, activation='relu')(pre_trained.output)
x=Dropout(0.5)(x)
x=Dense(128, activation='relu')(x)
output=Dense(9, activation='softmax')(x)
model = Model(inputs=inp_model, outputs=output)




model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

from tensorflow.keras.callbacks import EarlyStopping



results = model.fit(X_train_img,epochs=20, validation_data=X_val_img)
WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not in [96, 128, 160, 192, 224]. Weights for input shape (224, 224) will be loaded as the default.
WARNING:tensorflow:`input_shape` is undefined or non-square, or `rows` is not in [96, 128, 160, 192, 224]. Weights for input shape (224, 224) will be loaded as the default.
Epoch 1/20
158/158 [==============================] - 353s 2s/step - loss: 1.4294 - accuracy: 0.4815 - val_loss: 0.7306 - val_accuracy: 0.7667
Epoch 2/20
158/158 [==============================] - 326s 2s/step - loss: 0.8210 - accuracy: 0.7091 - val_loss: 0.4884 - val_accuracy: 0.8412
Epoch 3/20
158/158 [==============================] - 334s 2s/step - loss: 0.5701 - accuracy: 0.8000 - val_loss: 0.3185 - val_accuracy: 0.8958
Epoch 4/20
158/158 [==============================] - 331s 2s/step - loss: 0.4697 - accuracy: 0.8276 - val_loss: 0.2872 - val_accuracy: 0.8981
Epoch 5/20
158/158 [==============================] - 328s 2s/step - loss: 0.3928 - accuracy: 0.8552 - val_loss: 0.2371 - val_accuracy: 0.9185
Epoch 6/20
158/158 [==============================] - 333s 2s/step - loss: 0.3308 - accuracy: 0.8825 - val_loss: 0.2188 - val_accuracy: 0.9287
Epoch 7/20
158/158 [==============================] - 327s 2s/step - loss: 0.3081 - accuracy: 0.8921 - val_loss: 0.2353 - val_accuracy: 0.9222
Epoch 8/20
158/158 [==============================] - 334s 2s/step - loss: 0.2703 - accuracy: 0.9048 - val_loss: 0.1993 - val_accuracy: 0.9255
Epoch 9/20
158/158 [==============================] - 333s 2s/step - loss: 0.2520 - accuracy: 0.9109 - val_loss: 0.1801 - val_accuracy: 0.9347
Epoch 10/20
158/158 [==============================] - 330s 2s/step - loss: 0.2205 - accuracy: 0.9214 - val_loss: 0.1732 - val_accuracy: 0.9366
Epoch 11/20
158/158 [==============================] - 334s 2s/step - loss: 0.2057 - accuracy: 0.9256 - val_loss: 0.1752 - val_accuracy: 0.9356
Epoch 12/20
158/158 [==============================] - 328s 2s/step - loss: 0.2088 - accuracy: 0.9258 - val_loss: 0.1633 - val_accuracy: 0.9389
Epoch 13/20
158/158 [==============================] - 334s 2s/step - loss: 0.2069 - accuracy: 0.9212 - val_loss: 0.1557 - val_accuracy: 0.9440
Epoch 14/20
158/158 [==============================] - 332s 2s/step - loss: 0.1908 - accuracy: 0.9276 - val_loss: 0.1593 - val_accuracy: 0.9468
Epoch 15/20
158/158 [==============================] - 336s 2s/step - loss: 0.1999 - accuracy: 0.9274 - val_loss: 0.1386 - val_accuracy: 0.9528
Epoch 16/20
158/158 [==============================] - 327s 2s/step - loss: 0.1665 - accuracy: 0.9389 - val_loss: 0.1538 - val_accuracy: 0.9505
Epoch 17/20
158/158 [==============================] - 340s 2s/step - loss: 0.1487 - accuracy: 0.9466 - val_loss: 0.1410 - val_accuracy: 0.9537
Epoch 18/20
158/158 [==============================] - 382s 2s/step - loss: 0.1545 - accuracy: 0.9470 - val_loss: 0.1361 - val_accuracy: 0.9569
Epoch 19/20
158/158 [==============================] - 331s 2s/step - loss: 0.1581 - accuracy: 0.9438 - val_loss: 0.1371 - val_accuracy: 0.9537
Epoch 20/20
158/158 [==============================] - 336s 2s/step - loss: 0.1402 - accuracy: 0.9490 - val_loss: 0.1411 - val_accuracy: 0.9542
In [ ]:
results
In [24]:
import pandas as pd
result = pd.DataFrame(results.history)
fig, ax=plt.subplots(nrows=1, ncols=2,figsize=(18,6))
ax=ax.flatten()
ax[0].plot(result[['accuracy','val_accuracy']])
ax[0].set_title("Accuracy")
ax[1].plot(result[['loss','val_loss']])
ax[1].set_title("Loss")
Out[24]:
Text(0.5, 1.0, 'Loss')

MODEL PREDICTION :

In [26]:
import numpy as np
pred = model.predict(X_test_img)
pred=np.argmax(pred,axis=1)
In [27]:
pred_df=X_test.copy()
labels={}
for l,v in X_test_img.class_indices.items():
    labels.update({v:l})
pred_df['pred']=pred
pred_df['pred']=pred_df['pred'].apply(lambda x: labels[x])
In [28]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
print(f"Accuracy Score: {accuracy_score(pred_df['label'],pred_df['pred'])}")
sns.heatmap(confusion_matrix(pred_df['label'],pred_df['pred']), annot=True, fmt='2d')
Accuracy Score: 0.9572222222222222
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f2bf3643590>
In [29]:
print(pred_df[pred_df['label']==pred_df['pred']].head(6))
fig, ax=plt.subplots(nrows=2, ncols=3, figsize=(15,8))
ax=ax.flatten()
imlist=pred_df[pred_df['label']==pred_df['pred']].head(6).reset_index()
for i in range(0,6):
    ax[i].imshow(plt.imread(imlist['path'][i]))
    ax[i].set_title(imlist['label'][i])
                                                   path               label  \
7940  /content/drive/MyDrive/Colab Notebooks/archive...          Red Mullet   
1162  /content/drive/MyDrive/Colab Notebooks/archive...               Trout   
582   /content/drive/MyDrive/Colab Notebooks/archive...  Striped Red Mullet   
4081  /content/drive/MyDrive/Colab Notebooks/archive...       Red Sea Bream   
8412  /content/drive/MyDrive/Colab Notebooks/archive...            Sea Bass   
8730  /content/drive/MyDrive/Colab Notebooks/archive...            Sea Bass   

                    pred  
7940          Red Mullet  
1162               Trout  
582   Striped Red Mullet  
4081       Red Sea Bream  
8412            Sea Bass  
8730            Sea Bass  
In [ ]: