I am trying to classify some video into 3 different classes. Each video has different length of frame. The training data has the shape of (104, None, 528) where:
- 104 = Number of videos
- None = number of frames for each video which are different
- 528 = Number of features for each frame
As the sequence of frames for each video is long I am using "stateful LSTM" to manage the length of sequences. I have defined my model same as below:
def LSTM_Model():
model = Sequential()
model.add(LSTM(units = 256, input_shape=(None, 528),\
return_sequences=False, stateful=True, batch_size = 1))
model.add(Dropout(0.4))
model.add(Dense(3, activation='softmax'))
opt = keras.optimizers.SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True)
model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = ['accuracy'])
model.summary()
return model
Then I trained the model:
def train_model(X, y, X_test, y_test, model):
np.random.seed(200)
epochs = 100
maxlen = 500
for epoch in range(epochs):
mean_tr_loss, mean_tr_acc =[],[]
print('Epoch: ', epoch + 1 )
for sbj in range(X.shape[0]):
video = X[sbj]
y_sbj = y[sbj,:]
y_new = y_sbj
nb_frame = video.shape[0]
for count in range(nb_frame // maxlen +1):
if count == nb_frame // maxlen :
seq = video[count*maxlen + count:, :]
else:
seq = video[count*maxlen+count : (count+1)*maxlen+count, :]
seq = np.expand_dims(seq, axis=0)
# ''' Using train_on_batch '''
tr_loss, tr_acc = model.train_on_batch(seq, np.array([y_new]))
mean_tr_loss.append(tr_loss)
mean_tr_acc.append(tr_acc)
print('Training on subject', sbj+1, 'done' )
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
print('Testing....')
mean_te_loss, mean_te_acc =[],[]
for sbj_test in range(X_test.shape[0]):
video_test = X_test[sbj_test]
y_new_test = y_test[sbj_test]
nb_frame_test = video_test.shape[0]
for i in range(nb_frame_test // maxlen + 1):
if i == nb_frame_test // maxlen :
seq_test = video_test[i*maxlen + i:, :]
else:
seq_test = video_test[i*maxlen+i : (i+1)*maxlen+i, :]
seq_test = np.expand_dims(seq_test, axis=0)
te_loss, te_acc = model.test_on_batch(seq_test, np.array([y_new_test]))
mean_te_loss.append(te_loss)
mean_te_acc.append(te_acc)
print('Testing on subject', sbj_test+1, 'done' )
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
In the above code I considered each video separately and then each video was divided to different frame sequences with length 500 (except last sequence frame for each video because the length of frames are not divisible by 500). The training accuracy and test accuracy are same as below.
Epoch1 : accuracy training = 0.3694 accuracy testing = 0.3927
loss training = 1.146 loss testing = 1.109
Epoch2 : accuracy training = 0.4423 accuracy testing = 0.4048
loss training = 1.053 loss testing = 1.109
Epoch3 : accuracy training = 0.5017 accuracy testing = 0.4236
loss training = 0.994 loss testing = 1.115
Epoch4 : accuracy training = 0.5491 accuracy testing = 0.4099
loss training = 0.94 loss testing = 1.124
Epoch5: accuracy training = 0.5612 accuracy testing = 0.4013
loss training = 0.924 loss testing = 1.128
Epoch6 : accuracy training = 0.6142 accuracy testing = 0.4113
loss training = 0.859 loss testing = 1.137
Epoch7 : accuracy training = 0.6263 accuracy testing = 0.4116
loss training = 0.824 loss testing = 1.142
Epoch8 : accuracy training = 0.6659 accuracy testing = 0.415
loss training = 0.775 loss testing = 1.152
After 100 epochs training accuracy increases while testing accuracy doesn't improve. If the case is "overfitting" adding dropout layer should help which didn't. So, I am confused about the cause.
Any idea or suggestion would be appreciated.
0 Answer(s)