|
|
import os
|
|
|
import time
|
|
|
import numpy as np
|
|
|
import cv2
|
|
|
import mediapipe as mp
|
|
|
from prediction import predict_from_image
|
|
|
from PredictWord import PredictWord, clear_notepad_file
|
|
|
|
|
|
Header_path = "Assets/header"
|
|
|
myList = os.listdir(Header_path)
|
|
|
cam = cv2.VideoCapture(0)
|
|
|
wCam, hCam = 1280, 720
|
|
|
|
|
|
|
|
|
class HandDetector:
|
|
|
def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.8, trackCon=0.8):
|
|
|
self.mode = mode
|
|
|
self.maxHands = maxHands
|
|
|
self.modelComplexity = modelComplexity
|
|
|
self.detectionCon = detectionCon
|
|
|
self.trackCon = trackCon
|
|
|
self.mpHands = mp.solutions.hands
|
|
|
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon,
|
|
|
self.trackCon)
|
|
|
self.mpDraw = mp.solutions.drawing_utils
|
|
|
self.tipIds = [4, 8, 12, 16, 20]
|
|
|
self.lmList = []
|
|
|
|
|
|
def findHands(self, img):
|
|
|
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
|
self.results = self.hands.process(imgRGB)
|
|
|
if self.results.multi_hand_landmarks:
|
|
|
for handLms in self.results.multi_hand_landmarks:
|
|
|
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
|
|
|
return img
|
|
|
|
|
|
def findPosition(self, img, handNo=0):
|
|
|
self.lmList = []
|
|
|
if self.results.multi_hand_landmarks:
|
|
|
myHand = self.results.multi_hand_landmarks[handNo]
|
|
|
for id, lm in enumerate(myHand.landmark):
|
|
|
h, w, c = img.shape
|
|
|
cx, cy = int(lm.x * w), int(lm.y * h)
|
|
|
self.lmList.append([id, cx, cy])
|
|
|
return self.lmList
|
|
|
|
|
|
def fingerup(self):
|
|
|
fingers = []
|
|
|
|
|
|
if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
|
|
|
fingers.append(1)
|
|
|
else:
|
|
|
fingers.append(0)
|
|
|
|
|
|
for id in range(1, 5):
|
|
|
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
|
|
|
fingers.append(1)
|
|
|
else:
|
|
|
fingers.append(0)
|
|
|
return fingers
|
|
|
|
|
|
def main():
|
|
|
detector = HandDetector()
|
|
|
cTime = 0
|
|
|
|
|
|
overlayList = []
|
|
|
drawColor = (0, 0, 255)
|
|
|
for impath in myList:
|
|
|
image = cv2.imread(f'{Header_path}/{impath}')
|
|
|
if image is not None:
|
|
|
overlayList.append(image)
|
|
|
header = None
|
|
|
if overlayList:
|
|
|
header = cv2.resize(overlayList[0], (1280, 125)) if overlayList[0].shape != (125, 1280, 3) else overlayList[0]
|
|
|
|
|
|
RightBar = cv2.imread('Assets/sidebar/right.png')
|
|
|
RightBar = cv2.resize(RightBar, (230, 595))
|
|
|
LeftBar = cv2.imread('Assets/sidebar/left.png')
|
|
|
LeftBar = cv2.resize(LeftBar, (226, 300))
|
|
|
|
|
|
mode = "Drawing Mode"
|
|
|
canvas = np.zeros((720, 1280, 3), np.uint8)
|
|
|
submode = "Letter_Prediction"
|
|
|
predicted_letter = ""
|
|
|
clear_notepad_file(output_dir='output', filename='output.txt')
|
|
|
xp, yp = 0, 0
|
|
|
while True:
|
|
|
success, img = cam.read()
|
|
|
img = cv2.resize(img, (wCam, hCam))
|
|
|
img = cv2.flip(img, 1)
|
|
|
img = detector.findHands(img)
|
|
|
lmlist = detector.findPosition(img)
|
|
|
|
|
|
|
|
|
if len(lmlist) != 0:
|
|
|
x1, y1 = lmlist[8][1:3]
|
|
|
x2, y2 = lmlist[12][1:3]
|
|
|
fingers = []
|
|
|
if lmlist:
|
|
|
fingers = detector.fingerup()
|
|
|
|
|
|
|
|
|
if fingers[1] == 1 and fingers[2] == 1:
|
|
|
xp, yp = 0, 0
|
|
|
if y1 < 125 and len(overlayList) >= 2:
|
|
|
if 0 < x1 < 271:
|
|
|
drawColor = (0, 0, 255)
|
|
|
header = cv2.resize(overlayList[0], (1280, 125))
|
|
|
elif 850 < x1 < 1280 and len(overlayList) > 1:
|
|
|
drawColor = (0, 0, 0)
|
|
|
header = cv2.resize(overlayList[1], (1280, 125))
|
|
|
cv2.rectangle(img, (x1, y1 - 25), (x2, y2 + 25), drawColor, cv2.FILLED)
|
|
|
|
|
|
|
|
|
if x1 > 1050:
|
|
|
if 125 < y1 < 250:
|
|
|
canvas = np.zeros((720, 1280, 3), np.uint8)
|
|
|
if 260 < y1 < 385:
|
|
|
pass
|
|
|
if 385 < y1 < 510:
|
|
|
mode = "Drawing Mode"
|
|
|
if 510 < y1 < 635:
|
|
|
mode = "Prediction Mode"
|
|
|
|
|
|
|
|
|
if len(fingers) >= 3 and fingers[1] and not fingers[2] and mode == "Drawing Mode":
|
|
|
if xp == 0 and yp == 0:
|
|
|
xp, yp = x1, y1
|
|
|
xp, yp = x1, y1
|
|
|
|
|
|
if drawColor == (0, 0, 0):
|
|
|
cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
|
|
|
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
|
|
|
else:
|
|
|
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
|
|
|
xp, yp = x1, y1
|
|
|
|
|
|
|
|
|
if mode == "Prediction Mode":
|
|
|
if LeftBar is not None:
|
|
|
img[125:425, 0:226] = LeftBar
|
|
|
if len(fingers) >= 3 and fingers[1] and not fingers[2]:
|
|
|
if xp == 0 and yp == 0:
|
|
|
xp, yp = x1, y1
|
|
|
xp, yp = x1, y1
|
|
|
|
|
|
if drawColor == (0, 0, 0):
|
|
|
cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
|
|
|
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
|
|
|
else:
|
|
|
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
|
|
|
xp, yp = x1, y1
|
|
|
|
|
|
|
|
|
if x1 < 300:
|
|
|
if 150 < y1 < 300:
|
|
|
submode = "Letter Prediction"
|
|
|
cv2.imwrite("Output/Letter.png", canvas)
|
|
|
predicted_letter, confidence = predict_from_image("Output/Letter.png")
|
|
|
cv2.putText(img, f'Predicted Letter: {predicted_letter}', (50, 500), cv2.FONT_HERSHEY_TRIPLEX,
|
|
|
1, (255, 0, 255), 2)
|
|
|
prediction_time = time.time()
|
|
|
reset_canvas = True
|
|
|
|
|
|
if 315 < y1 < 405:
|
|
|
submode = "Word Prediction"
|
|
|
cv2.imwrite("Output/Word.png", canvas)
|
|
|
predictor = PredictWord("Output/Word.png")
|
|
|
result = predictor.predict()
|
|
|
print("Detected word:", result)
|
|
|
PredictWord.save_and_speak_word(result, output_dir='output', filename='output.txt')
|
|
|
canvas = np.zeros((720, 1280, 3), np.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
imgGray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
|
|
|
_, imgInv = cv2.threshold(imgGray, 50, 255, cv2.THRESH_BINARY_INV)
|
|
|
imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR)
|
|
|
img = cv2.bitwise_and(img, imgInv)
|
|
|
img = cv2.bitwise_or(img, canvas)
|
|
|
|
|
|
|
|
|
pTime = time.time()
|
|
|
fps = 1 / (pTime - cTime) if cTime != 0 else 0
|
|
|
cTime = pTime
|
|
|
|
|
|
|
|
|
if header is not None:
|
|
|
img[0:125, 0:1280] = header
|
|
|
if RightBar is not None:
|
|
|
img[125:720, 1050:1280] = RightBar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cv2.putText(img, f"Mode : {mode}", (1065, 645), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 255), 1)
|
|
|
cv2.putText(img, f'FPS: {int(fps)}', (1095, 695), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 1)
|
|
|
cv2.imshow("Canvas", canvas)
|
|
|
cv2.imshow("Image", img)
|
|
|
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
|
break
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
cam.release()
|
|
|
cv2.destroyAllWindows() |