HumanFallDetectionLSTM/algorithms.py

import cv2
import logging
import base64
import time
import numpy as np
import matplotlib.pyplot as plt
from vis.visual import write_on_image, visualise, activity_dict, visualise_tracking
from vis.processor import Processor
from helpers import pop_and_add, last_ip, dist, move_figure, get_hist
from default_params import *
from vis.inv_pendulum import *
import re
import pandas as pd
from scipy.signal import savgol_filter, lfilter
from model.model import LSTMModel
import torch
import math


def get_source(args):
    tagged_df = None
    if args.video is None:
        cam = cv2.VideoCapture(0)
    else:
        logging.debug(f'Video source: {args.video}')
        cam = cv2.VideoCapture(args.video)
        if isinstance(args.video, str):
            vid = [int(s) for s in re.findall(r'\d+', args.video)]
            if len(vid) == 5:
                tagged_df = pd.read_csv("dataset/CompleteDataSet.csv", usecols=[
                                        "TimeStamps", "Subject", "Activity", "Trial", "Tag"], skipinitialspace=True)
                tagged_df = tagged_df.query(
                    f'Subject == {vid[1]} & Activity == {vid[0]} & Trial == {vid[2]}')
    img = cam.read()[1]
    logging.debug('Image shape:', img.shape)
    return cam, tagged_df

def resize(img, resize, resolution):
    # Resize the video
    if resize is None:
        height, width = img.shape[:2]
    else:
        width, height = [int(dim) for dim in resize.split('x')]
    width_height = (int(width * resolution // 16) * 16,
                    int(height * resolution // 16) * 16)
    return width, height, width_height


def extract_keypoints_parallel(queue, args, self_counter, other_counter, consecutive_frames, event):
    try:
        cam, tagged_df = get_source(args)
        ret_val, img = cam.read()
    except Exception as e:
        queue.put(None)
        event.set()
        print('Exception occurred:', e)
        print('Most likely that the video/camera doesn\'t exist')
        return

    width, height, width_height = resize(img, args.resize, args.resolution)
    logging.debug(f'Target width and height = {width_height}')
    processor_singleton = Processor(width_height, args)

    output_video = None

    frame = 0
    fps = 0
    t0 = time.time()
    while not event.is_set():
        # print(args.video,self_counter.value,other_counter.value,sep=" ")
        if args.num_cams == 2 and (self_counter.value > other_counter.value):
            continue

        ret_val, img = cam.read()
        frame += 1
        self_counter.value += 1
        if tagged_df is None:
            curr_time = time.time()
        else:
            curr_time = tagged_df.iloc[frame-1]['TimeStamps'][11:]
            curr_time = sum(x * float(t) for x, t in zip([3600, 60, 1], curr_time.split(":")))

        if img is None:
            print('no more images captured')
            print(args.video, curr_time, sep=" ")
            if not event.is_set():
                event.set()
            break

        img = cv2.resize(img, (width, height))
        hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        keypoint_sets, bb_list, width_height = processor_singleton.single_image(img)
        assert bb_list is None or (type(bb_list) == list)
        if bb_list:
            assert type(bb_list[0]) == tuple
            assert type(bb_list[0][0]) == tuple
        # assume bb_list is a of the form [(x1,y1),(x2,y2)),etc.]

        if args.coco_points:
            keypoint_sets = [keypoints.tolist() for keypoints in keypoint_sets]
        else:
            anns = [get_kp(keypoints.tolist()) for keypoints in keypoint_sets]
            ubboxes = [(np.asarray([width, height])*np.asarray(ann[1])).astype('int32')
                       for ann in anns]
            lbboxes = [(np.asarray([width, height])*np.asarray(ann[2])).astype('int32')
                       for ann in anns]
            bbox_list = [(np.asarray([width, height])*np.asarray(box)).astype('int32') for box in bb_list]
            uhist_list = [get_hist(hsv_img, bbox) for bbox in ubboxes]
            lhist_list = [get_hist(img, bbox) for bbox in lbboxes]
            keypoint_sets = [{"keypoints": keyp[0], "up_hist":uh, "lo_hist":lh, "time":curr_time, "box":box}
                             for keyp, uh, lh, box in zip(anns, uhist_list, lhist_list, bbox_list)]

            cv2.polylines(img, ubboxes, True, (255, 0, 0), 2)
            cv2.polylines(img, lbboxes, True, (0, 255, 0), 2)
            for box in bbox_list:
                cv2.rectangle(img, tuple(box[0]), tuple(box[1]), ((0, 0, 255)), 2)

        dict_vis = {"img": img, "keypoint_sets": keypoint_sets, "width": width, "height": height, "vis_keypoints": args.joints,
                    "vis_skeleton": args.skeleton, "CocoPointsOn": args.coco_points,
                    "tagged_df": {"text": f"Avg FPS: {frame//(time.time()-t0)}, Frame: {frame}", "color": [0, 0, 0]}}
        queue.put(dict_vis)

    queue.put(None)
    return

###################################################### Post human estimation ###########################################################

def show_tracked_img(img_dict, ip_set, num_matched, output_video, args):
    img = img_dict["img"]
    tagged_df = img_dict["tagged_df"]
    keypoints_frame = [person[-1] for person in ip_set]
    img = visualise_tracking(img=img, keypoint_sets=keypoints_frame, width=img_dict["width"], height=img_dict["height"],
                             num_matched=num_matched, vis_keypoints=img_dict["vis_keypoints"], vis_skeleton=img_dict["vis_skeleton"],
                             CocoPointsOn=False)

    img = write_on_image(img=img, text=tagged_df["text"],
                         color=tagged_df["color"])

    if output_video is None:
        if args.save_output:
            vidname = args.video.split('/')
            output_video = cv2.VideoWriter(filename='/'.join(vidname[:-1])+'/out'+vidname[-1][:-3]+'avi', fourcc=cv2.VideoWriter_fourcc(*'MP42'),
                                           fps=args.fps, frameSize=img.shape[:2][::-1])
            logging.debug(
                f'Saving the output video at {args.out_path} with {args.fps} frames per seconds')
        else:
            output_video = None
            logging.debug(f'Not saving the output video')
    else:
        output_video.write(img)
    return img, output_video


def remove_wrongly_matched(matched_1, matched_2):

    unmatched_idxs = []
    i = 0

    for ip1, ip2 in zip(matched_1, matched_2):
        # each of these is a set of the last t framses of each matched person
        correlation = cv2.compareHist(last_valid_hist(ip1)["up_hist"], last_valid_hist(ip2)["up_hist"], cv2.HISTCMP_CORREL)
        if correlation < 0.5*HIST_THRESH:
            unmatched_idxs.append(i)
        i += 1

    return unmatched_idxs


def match_unmatched(unmatched_1, unmatched_2, lstm_set1, lstm_set2, num_matched):

    new_matched_1 = []
    new_matched_2 = []
    new_lstm1 = []
    new_lstm2 = []
    final_pairs = [[], []]

    if not unmatched_1 or not unmatched_2:
        return final_pairs, new_matched_1, new_matched_2, new_lstm1, new_lstm2

    new_matched = 0
    correlation_matrix = - np.ones((len(unmatched_1), len(unmatched_2)))
    dist_matrix = np.zeros((len(unmatched_1), len(unmatched_2)))
    for i in range(len(unmatched_1)):
        for j in range(len(unmatched_2)):
            correlation_matrix[i][j] = cv2.compareHist(last_valid_hist(unmatched_1[i])["up_hist"],
                                                       last_valid_hist(unmatched_2[j])["up_hist"], cv2.HISTCMP_CORREL)
            dist_matrix[i][j] = np.sum(np.absolute(last_valid_hist(unmatched_1[i])["up_hist"]-last_valid_hist(unmatched_2[j])["up_hist"]))

    freelist_1 = [i for i in range(len(unmatched_1))]
    pair_21 = [-1]*len(unmatched_2)
    unmatched_1_preferences = np.argsort(-correlation_matrix, axis=1)
    # print("cor", correlation_matrix, sep="\n")
    # print("unmatched_1", unmatched_1_preferences, sep="\n")
    unmatched_indexes1 = [0]*len(unmatched_1)
    finish_array = [False]*len(unmatched_1)
    while freelist_1:
        um1_idx = freelist_1[-1]
        if finish_array[um1_idx] == True:
            freelist_1.pop()
            continue
        next_unasked_2 = unmatched_1_preferences[um1_idx][unmatched_indexes1[um1_idx]]
        if pair_21[next_unasked_2] == -1:
            pair_21[next_unasked_2] = um1_idx
            freelist_1.pop()
        else:
            curr_paired_2 = pair_21[next_unasked_2]
            if correlation_matrix[curr_paired_2][next_unasked_2] < correlation_matrix[um1_idx][next_unasked_2]:
                pair_21[next_unasked_2] = um1_idx
                freelist_1.pop()
                if not finish_array[curr_paired_2]:
                    freelist_1.append(curr_paired_2)

        unmatched_indexes1[um1_idx] += 1
        if unmatched_indexes1[um1_idx] == len(unmatched_2):
            finish_array[um1_idx] = True

    for j, i in enumerate(pair_21):
        if correlation_matrix[i][j] > HIST_THRESH:
            final_pairs[0].append(i+num_matched)
            final_pairs[1].append(j+num_matched)
            new_matched_1.append(unmatched_1[i])
            new_matched_2.append(unmatched_2[j])
            new_lstm1.append(lstm_set1[i])
            new_lstm2.append(lstm_set2[j])

    # print("finalpairs", final_pairs, sep="\n")

    return final_pairs, new_matched_1, new_matched_2, new_lstm1, new_lstm2


def alg2_sequential(queues, argss, consecutive_frames, event):
    model = LSTMModel(h_RNN=48, h_RNN_layers=2, drop_p=0.1, num_classes=7)
    model.load_state_dict(torch.load('model/lstm_weights.sav',map_location=argss[0].device))
    model.eval()
    output_videos = [None for _ in range(argss[0].num_cams)]
    t0 = time.time()
    feature_plotters = [[[], [], [], [], []] for _ in range(argss[0].num_cams)]
    ip_sets = [[] for _ in range(argss[0].num_cams)]
    lstm_sets = [[] for _ in range(argss[0].num_cams)]
    max_length_mat = 300
    num_matched = 0
    if not argss[0].plot_graph:
        max_length_mat = consecutive_frames
    else:
        f, ax = plt.subplots()
        move_figure(f, 800, 100)
    window_names = [args.video if isinstance(args.video, str) else 'Cam '+str(args.video) for args in argss]
    [cv2.namedWindow(window_name) for window_name in window_names]
    while True:

        # if not queue1.empty() and not queue2.empty():
        if not any(q.empty() for q in queues):
            dict_frames = [q.get() for q in queues]

            if any([(dict_frame is None) for dict_frame in dict_frames]):
                if not event.is_set():
                    event.set()
                break

            if cv2.waitKey(1) == 27 or any(cv2.getWindowProperty(window_name, cv2.WND_PROP_VISIBLE) < 1 for window_name in window_names):
                if not event.is_set():
                    event.set()

            kp_frames = [dict_frame["keypoint_sets"] for dict_frame in dict_frames]
            if argss[0].num_cams == 1:
                num_matched, new_num, indxs_unmatched = match_ip(ip_sets[0], kp_frames[0], lstm_sets[0], num_matched, max_length_mat)
                valid1_idxs, prediction = get_all_features(ip_sets[0], lstm_sets[0], model)
                dict_frames[0]["tagged_df"]["text"] += f" Pred: {activity_dict[prediction+5]}"
                img, output_videos[0] = show_tracked_img(dict_frames[0], ip_sets[0], num_matched, output_videos[0], argss[0])
                # print(img1.shape)
                cv2.imshow(window_names[0], img)

            elif argss[0].num_cams == 2:
                num_matched, new_num, indxs_unmatched1 = match_ip(ip_sets[0], kp_frames[0], lstm_sets[0], num_matched, max_length_mat)
                assert(new_num == len(ip_sets[0]))
                for i in sorted(indxs_unmatched1, reverse=True):
                    elem = ip_sets[1][i]
                    ip_sets[1].pop(i)
                    ip_sets[1].append(elem)
                    elem_lstm = lstm_sets[1][i]
                    lstm_sets[1].pop(i)
                    lstm_sets[1].append(elem_lstm)
                num_matched, new_num, indxs_unmatched2 = match_ip(ip_sets[1], kp_frames[1], lstm_sets[1], num_matched, max_length_mat)

                for i in sorted(indxs_unmatched2, reverse=True):
                    elem = ip_sets[0][i]
                    ip_sets[0].pop(i)
                    ip_sets[0].append(elem)
                    elem_lstm = lstm_sets[0][i]
                    lstm_sets[0].pop(i)
                    lstm_sets[0].append(elem_lstm)

                matched_1 = ip_sets[0][:num_matched]
                matched_2 = ip_sets[1][:num_matched]

                unmatch_previous = remove_wrongly_matched(matched_1, matched_2)
                if unmatch_previous:
                    print(unmatch_previous)

                for i in sorted(unmatch_previous, reverse=True):
                    elem1 = ip_sets[0][i]
                    elem2 = ip_sets[1][i]
                    ip_sets[0].pop(i)
                    ip_sets[1].pop(i)
                    ip_sets[0].append(elem1)
                    ip_sets[1].append(elem2)
                    elem_lstm1 = lstm_sets[0][i]
                    lstm_sets[0].pop(i)
                    lstm_sets[0].append(elem_lstm1)
                    elem_lstm2 = lstm_sets[1][i]
                    lstm_sets[1].pop(i)
                    lstm_sets[1].append(elem_lstm2)
                    num_matched -= 1

                unmatched_1 = ip_sets[0][num_matched:]
                unmatched_2 = ip_sets[1][num_matched:]

                new_pairs, new_matched1, new_matched2, new_lstm1, new_lstm2 = match_unmatched(
                    unmatched_1, unmatched_2, lstm_sets[0], lstm_sets[1], num_matched)

                new_p1 = new_pairs[0]
                new_p2 = new_pairs[1]

                for i in sorted(new_p1, reverse=True):
                    ip_sets[0].pop(i)
                    lstm_sets[0].pop(i)
                for i in sorted(new_p2, reverse=True):
                    ip_sets[1].pop(i)
                    lstm_sets[1].pop(i)

                ip_sets[0] = ip_sets[0][:num_matched] + new_matched1 + ip_sets[0][num_matched:]
                ip_sets[1] = ip_sets[1][:num_matched] + new_matched2 + ip_sets[1][num_matched:]
                lstm_sets[0] = lstm_sets[0][:num_matched] + new_lstm1 + lstm_sets[0][num_matched:]
                lstm_sets[1] = lstm_sets[1][:num_matched] + new_lstm2 + lstm_sets[1][num_matched:]
                # remember to match the energy matrices also

                num_matched = num_matched + len(new_matched1)

                # get features now

                valid1_idxs, prediction1 = get_all_features(ip_sets[0], lstm_sets[0], model)
                valid2_idxs, prediction2 = get_all_features(ip_sets[1], lstm_sets[1], model)
                dict_frames[0]["tagged_df"]["text"] += f" Pred: {activity_dict[prediction1+5]}"
                dict_frames[1]["tagged_df"]["text"] += f" Pred: {activity_dict[prediction2+5]}"
                img1, output_videos[0] = show_tracked_img(dict_frames[0], ip_sets[0], num_matched, output_videos[0], argss[0])
                img2, output_videos[1] = show_tracked_img(dict_frames[1], ip_sets[1], num_matched, output_videos[1], argss[1])
                # print(img1.shape)
                cv2.imshow(window_names[0], img1)
                cv2.imshow(window_names[1], img2)

                assert(len(lstm_sets[0]) == len(ip_sets[0]))
                assert(len(lstm_sets[1]) == len(ip_sets[1]))

            DEBUG = False
            # for ip_set, feature_plotter in zip(ip_sets, feature_plotters):
            #     for cnt in range(len(FEATURE_LIST)):
            #         plt_f = FEATURE_LIST[cnt]
            #         if ip_set and ip_set[0] is not None and ip_set[0][-1] is not None and plt_f in ip_set[0][-1]["features"]:
            #             # print(ip_set[0][-1]["features"])
            #             feature_plotter[cnt].append(ip_set[0][-1]["features"][plt_f])
            #
            #         else:
            #             # print("None")
            #             feature_plotter[cnt].append(0)
            # DEBUG = True

    cv2.destroyAllWindows()
    # for feature_plotter in feature_plotters:
    #     for i, feature_arr in enumerate(feature_plotter):
    #         plt.clf()
    #         x = np.linspace(1, len(feature_arr), len(feature_arr))
    #         axes = plt.gca()
    #         filter_array = feature_arr
    #         line, = axes.plot(x, filter_array, 'r-')
    #         plt.ylabel(FEATURE_LIST[i])
    #         # #plt.savefig(f'{args1.video}_{FEATURE_LIST[i]}_filter.png')
    #         plt.pause(1e-7)

    # for i, feature_arr in enumerate(feature_plotter2):
    #     plt.clf()
    #     x = np.linspace(1, len(feature_arr), len(feature_arr))
    #     axes = plt.gca()
    #     filter_array = feature_arr
    #     line, = axes.plot(x, filter_array, 'r-')
    #     plt.ylabel(FEATURE_LIST[i])
    #     # plt.savefig(f'{args2.video}_{FEATURE_LIST[i]}_filter.png')
    #     plt.pause(1e-7)
    #     # if len(re_matrix1[0]) > 0:
    #     #     print(np.linalg.norm(ip_sets[0][0][-1][0]['B']-ip_sets[0][0][-1][0]['H']))

    # print("P2 Over")
    del model
    return


def get_all_features(ip_set, lstm_set, model):
    valid_idxs = []
    invalid_idxs = []
    predictions = [15]*len(ip_set)  # 15 is the tag for None

    for i, ips in enumerate(ip_set):
        # ip set for a particular person
        last1 = None
        last2 = None
        for j in range(-2, -1*DEFAULT_CONSEC_FRAMES - 1, -1):
            if ips[j] is not None:
                if last1 is None:
                    last1 = j
                elif last2 is None:
                    last2 = j
        if ips[-1] is None:
            invalid_idxs.append(i)
            # continue
        else:
            ips[-1]["features"] = {}
            # get re, gf, angle, bounding box ratio, ratio derivative
            ips[-1]["features"]["height_bbox"] = get_height_bbox(ips[-1])
            ips[-1]["features"]["ratio_bbox"] = FEATURE_SCALAR["ratio_bbox"]*get_ratio_bbox(ips[-1])

            body_vector = ips[-1]["keypoints"]["N"] - ips[-1]["keypoints"]["B"]
            ips[-1]["features"]["angle_vertical"] = FEATURE_SCALAR["angle_vertical"]*get_angle_vertical(body_vector)
            # print(ips[-1]["features"]["angle_vertical"])
            ips[-1]["features"]["log_angle"] = FEATURE_SCALAR["log_angle"]*np.log(1 + np.abs(ips[-1]["features"]["angle_vertical"]))

            if last1 is None:
                invalid_idxs.append(i)
                # continue
            else:
                ips[-1]["features"]["re"] = FEATURE_SCALAR["re"]*get_rot_energy(ips[last1], ips[-1])
                ips[-1]["features"]["ratio_derivative"] = FEATURE_SCALAR["ratio_derivative"]*get_ratio_derivative(ips[last1], ips[-1])
                if last2 is None:
                    invalid_idxs.append(i)
                    # continue
                else:
                    ips[-1]["features"]["gf"] = get_gf(ips[last2], ips[last1], ips[-1])
                    valid_idxs.append(i)

        xdata = []
        if ips[-1] is None:
            if last1 is None:
                xdata = [0]*len(FEATURE_LIST)
            else:
                for feat in FEATURE_LIST[:FRAME_FEATURES]:
                    xdata.append(ips[last1]["features"][feat])
                xdata += [0]*(len(FEATURE_LIST)-FRAME_FEATURES)
        else:
            for feat in FEATURE_LIST:
                if feat in ips[-1]["features"]:
                    xdata.append(ips[-1]["features"][feat])
                else:
                    xdata.append(0)

        xdata = torch.Tensor(xdata).view(-1, 1, 5)
        # what is ips[-2] is none
        outputs, lstm_set[i][0] = model(xdata, lstm_set[i][0])
        if i == 0:
            prediction = torch.max(outputs.data, 1)[1][0].item()
            confidence = torch.max(outputs.data, 1)[0][0].item()
            fpd = True
            # fpd = False
            if fpd:
                if prediction in [1, 2, 3, 5]:
                    lstm_set[i][3] -= 1
                    lstm_set[i][3] = max(lstm_set[i][3], 0)

                    if lstm_set[i][2] < EMA_FRAMES:
                        if ips[-1] is not None:
                            lstm_set[i][2] += 1
                            lstm_set[i][1] = (lstm_set[i][1]*(lstm_set[i][2]-1) + get_height_bbox(ips[-1]))/lstm_set[i][2]
                    else:
                        if ips[-1] is not None:
                            lstm_set[i][1] = (1-EMA_BETA)*get_height_bbox(ips[-1]) + EMA_BETA*lstm_set[i][1]

                elif prediction == 0:
                    if (ips[-1] is not None and lstm_set[i][1] != 0 and \
                            abs(ips[-1]["features"]["angle_vertical"]) < math.pi/4) or confidence < 0.4:
                            # (get_height_bbox(ips[-1]) > 2*lstm_set[i][1]/3 or abs(ips[-1]["features"]["angle_vertical"]) < math.pi/4):
                        prediction = 7
                    else:
                        lstm_set[i][3] += 1
                        if lstm_set[i][3] < DEFAULT_CONSEC_FRAMES//4:
                            prediction = 7
                else:
                    lstm_set[i][3] -= 1
                    lstm_set[i][3] = max(lstm_set[i][3], 0)
            predictions[i] = prediction

    return valid_idxs, predictions[0] if len(predictions) > 0 else 15


def get_frame_features(ip_set, new_frame, re_matrix, gf_matrix, num_matched, max_length_mat=DEFAULT_CONSEC_FRAMES):

    match_ip(ip_set, new_frame, re_matrix, gf_matrix, max_length_mat)
    return
    for i in range(len(ip_set)):
        if ip_set[i][-1] is not None:
            if ip_set[i][-2] is not None:
                pop_and_add(re_matrix[i], get_rot_energy(
                            ip_set[i][-2], ip_set[i][-1]), max_length_mat)
            elif ip_set[i][-3] is not None:
                pop_and_add(re_matrix[i], get_rot_energy(
                            ip_set[i][-3], ip_set[i][-1]), max_length_mat)
            elif ip_set[i][-4] is not None:
                pop_and_add(re_matrix[i], get_rot_energy(
                            ip_set[i][-4], ip_set[i][-1]), max_length_mat)
            else:
                pop_and_add(re_matrix[i], 0, max_length_mat)
        else:
            pop_and_add(re_matrix[i], 0, max_length_mat)

    for i in range(len(ip_set)):
        if ip_set[i][-1] is not None:
            last1 = None
            last2 = None
            for j in [-2, -3, -4, -5]:
                if ip_set[i][j] is not None:
                    if last1 is None:
                        last1 = j
                    elif last2 is None:
                        last2 = j

            if last2 is None:
                pop_and_add(gf_matrix[i], 0, max_length_mat)
                continue

            pop_and_add(gf_matrix[i], get_gf(ip_set[i][last2], ip_set[i][last1],
                                             ip_set[i][-1]), max_length_mat)

        else:

            pop_and_add(gf_matrix[i], 0, max_length_mat)

    return