Virtual_Board / main.py

Initial commit

654303c verified 6 months ago

8.52 kB

	import os
	import time
	import numpy as np
	import cv2
	import mediapipe as mp
	from prediction import predict_from_image
	from PredictWord import PredictWord, clear_notepad_file

	Header_path = "Assets/header"
	myList = os.listdir(Header_path)
	cam = cv2.VideoCapture(0)
	wCam, hCam = 1280, 720


	class HandDetector:
	def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.8, trackCon=0.8):
	self.mode = mode
	self.maxHands = maxHands
	self.modelComplexity = modelComplexity
	self.detectionCon = detectionCon
	self.trackCon = trackCon
	self.mpHands = mp.solutions.hands
	self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon,
	self.trackCon)
	self.mpDraw = mp.solutions.drawing_utils
	self.tipIds = [4, 8, 12, 16, 20]
	self.lmList = []

	def findHands(self, img):
	imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	self.results = self.hands.process(imgRGB)
	if self.results.multi_hand_landmarks:
	for handLms in self.results.multi_hand_landmarks:
	self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
	return img

	def findPosition(self, img, handNo=0):
	self.lmList = []
	if self.results.multi_hand_landmarks:
	myHand = self.results.multi_hand_landmarks[handNo]
	for id, lm in enumerate(myHand.landmark):
	h, w, c = img.shape
	cx, cy = int(lm.x * w), int(lm.y * h)
	self.lmList.append([id, cx, cy])
	return self.lmList

	def fingerup(self):
	fingers = []
	# Thumb
	if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
	fingers.append(1)
	else:
	fingers.append(0)

	for id in range(1, 5):
	if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
	fingers.append(1)
	else:
	fingers.append(0)
	return fingers

	def main():
	detector = HandDetector()
	cTime = 0

	overlayList = []
	drawColor = (0, 0, 255)
	for impath in myList:
	image = cv2.imread(f'{Header_path}/{impath}')
	if image is not None:
	overlayList.append(image)
	header = None
	if overlayList:
	header = cv2.resize(overlayList[0], (1280, 125)) if overlayList[0].shape != (125, 1280, 3) else overlayList[0]

	RightBar = cv2.imread('Assets/sidebar/right.png')
	RightBar = cv2.resize(RightBar, (230, 595))
	LeftBar = cv2.imread('Assets/sidebar/left.png')
	LeftBar = cv2.resize(LeftBar, (226, 300))

	mode = "Drawing Mode"
	canvas = np.zeros((720, 1280, 3), np.uint8)
	submode = "Letter_Prediction"
	predicted_letter = ""
	clear_notepad_file(output_dir='output', filename='output.txt')
	xp, yp = 0, 0
	while True:
	success, img = cam.read()
	img = cv2.resize(img, (wCam, hCam))
	img = cv2.flip(img, 1)
	img = detector.findHands(img)
	lmlist = detector.findPosition(img)

	# Only process drawing if hand landmarks are detected
	if len(lmlist) != 0:
	x1, y1 = lmlist[8][1:3]
	x2, y2 = lmlist[12][1:3]
	fingers = []
	if lmlist:
	fingers = detector.fingerup()

	# Selection Mode: both index and middle finger up
	if fingers[1] == 1 and fingers[2] == 1:
	xp, yp = 0, 0
	if y1 < 125 and len(overlayList) >= 2:
	if 0 < x1 < 271:
	drawColor = (0, 0, 255)
	header = cv2.resize(overlayList[0], (1280, 125))
	elif 850 < x1 < 1280 and len(overlayList) > 1:
	drawColor = (0, 0, 0)
	header = cv2.resize(overlayList[1], (1280, 125))
	cv2.rectangle(img, (x1, y1 - 25), (x2, y2 + 25), drawColor, cv2.FILLED)

	# Rightbar actions
	if x1 > 1050:
	if 125 < y1 < 250:
	canvas = np.zeros((720, 1280, 3), np.uint8) # Clear canvas
	if 260 < y1 < 385:
	pass
	if 385 < y1 < 510:
	mode = "Drawing Mode"
	if 510 < y1 < 635:
	mode = "Prediction Mode"

	# Drawing Mode: only index finger up
	if len(fingers) >= 3 and fingers[1] and not fingers[2] and mode == "Drawing Mode":
	if xp == 0 and yp == 0:
	xp, yp = x1, y1
	xp, yp = x1, y1

	if drawColor == (0, 0, 0):
	cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
	cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
	else:
	cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
	xp, yp = x1, y1


	if mode == "Prediction Mode":
	if LeftBar is not None:
	img[125:425, 0:226] = LeftBar
	if len(fingers) >= 3 and fingers[1] and not fingers[2]:
	if xp == 0 and yp == 0:
	xp, yp = x1, y1
	xp, yp = x1, y1

	if drawColor == (0, 0, 0):
	cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
	cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
	else:
	cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
	xp, yp = x1, y1

	# Leftbar actions
	if x1 < 300:
	if 150 < y1 < 300:
	submode = "Letter Prediction"
	cv2.imwrite("Output/Letter.png", canvas)
	predicted_letter, confidence = predict_from_image("Output/Letter.png")
	cv2.putText(img, f'Predicted Letter: {predicted_letter}', (50, 500), cv2.FONT_HERSHEY_TRIPLEX,
	1, (255, 0, 255), 2)
	prediction_time = time.time()
	reset_canvas = True

	if 315 < y1 < 405:
	submode = "Word Prediction"
	cv2.imwrite("Output/Word.png", canvas)
	predictor = PredictWord("Output/Word.png")
	result = predictor.predict()
	print("Detected word:", result)
	PredictWord.save_and_speak_word(result, output_dir='output', filename='output.txt')
	canvas = np.zeros((720, 1280, 3), np.uint8)
	#
	# # Place this outside the x1 < 300 block, so it runs every frame
	# if reset_canvas and prediction_time is not None:
	# if time.time() - prediction_time > 5:
	# canvas = np.zeros((720, 1280, 3), np.uint8)
	# reset_canvas = False
	# prediction_time = None

	# Combine canvas and camera image using bitwise operations
	imgGray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
	_, imgInv = cv2.threshold(imgGray, 50, 255, cv2.THRESH_BINARY_INV)
	imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR)
	img = cv2.bitwise_and(img, imgInv)
	img = cv2.bitwise_or(img, canvas)

	# Calculate FPS (frames per second)
	pTime = time.time()
	fps = 1 / (pTime - cTime) if cTime != 0 else 0
	cTime = pTime

	# Overlay header and RightBar only if they are loaded (robustness)
	if header is not None:
	img[0:125, 0:1280] = header
	if RightBar is not None:
	img[125:720, 1050:1280] = RightBar




	cv2.putText(img, f"Mode : {mode}", (1065, 645), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 255), 1)
	cv2.putText(img, f'FPS: {int(fps)}', (1095, 695), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 1)
	cv2.imshow("Canvas", canvas)
	cv2.imshow("Image", img)
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break

	if __name__ == "__main__":
	main()
	cam.release()
	cv2.destroyAllWindows()