import os import time import numpy as np import cv2 import mediapipe as mp from prediction import predict_from_image from PredictWord import PredictWord, clear_notepad_file Header_path = "Assets/header" myList = os.listdir(Header_path) cam = cv2.VideoCapture(0) wCam, hCam = 1280, 720 class HandDetector: def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.8, trackCon=0.8): self.mode = mode self.maxHands = maxHands self.modelComplexity = modelComplexity self.detectionCon = detectionCon self.trackCon = trackCon self.mpHands = mp.solutions.hands self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon, self.trackCon) self.mpDraw = mp.solutions.drawing_utils self.tipIds = [4, 8, 12, 16, 20] self.lmList = [] def findHands(self, img): imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.results = self.hands.process(imgRGB) if self.results.multi_hand_landmarks: for handLms in self.results.multi_hand_landmarks: self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS) return img def findPosition(self, img, handNo=0): self.lmList = [] if self.results.multi_hand_landmarks: myHand = self.results.multi_hand_landmarks[handNo] for id, lm in enumerate(myHand.landmark): h, w, c = img.shape cx, cy = int(lm.x * w), int(lm.y * h) self.lmList.append([id, cx, cy]) return self.lmList def fingerup(self): fingers = [] # Thumb if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]: fingers.append(1) else: fingers.append(0) for id in range(1, 5): if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]: fingers.append(1) else: fingers.append(0) return fingers def main(): detector = HandDetector() cTime = 0 overlayList = [] drawColor = (0, 0, 255) for impath in myList: image = cv2.imread(f'{Header_path}/{impath}') if image is not None: overlayList.append(image) header = None if overlayList: header = cv2.resize(overlayList[0], (1280, 125)) if overlayList[0].shape != (125, 1280, 3) else overlayList[0] RightBar = cv2.imread('Assets/sidebar/right.png') RightBar = cv2.resize(RightBar, (230, 595)) LeftBar = cv2.imread('Assets/sidebar/left.png') LeftBar = cv2.resize(LeftBar, (226, 300)) mode = "Drawing Mode" canvas = np.zeros((720, 1280, 3), np.uint8) submode = "Letter_Prediction" predicted_letter = "" clear_notepad_file(output_dir='output', filename='output.txt') xp, yp = 0, 0 while True: success, img = cam.read() img = cv2.resize(img, (wCam, hCam)) img = cv2.flip(img, 1) img = detector.findHands(img) lmlist = detector.findPosition(img) # Only process drawing if hand landmarks are detected if len(lmlist) != 0: x1, y1 = lmlist[8][1:3] x2, y2 = lmlist[12][1:3] fingers = [] if lmlist: fingers = detector.fingerup() # Selection Mode: both index and middle finger up if fingers[1] == 1 and fingers[2] == 1: xp, yp = 0, 0 if y1 < 125 and len(overlayList) >= 2: if 0 < x1 < 271: drawColor = (0, 0, 255) header = cv2.resize(overlayList[0], (1280, 125)) elif 850 < x1 < 1280 and len(overlayList) > 1: drawColor = (0, 0, 0) header = cv2.resize(overlayList[1], (1280, 125)) cv2.rectangle(img, (x1, y1 - 25), (x2, y2 + 25), drawColor, cv2.FILLED) # Rightbar actions if x1 > 1050: if 125 < y1 < 250: canvas = np.zeros((720, 1280, 3), np.uint8) # Clear canvas if 260 < y1 < 385: pass if 385 < y1 < 510: mode = "Drawing Mode" if 510 < y1 < 635: mode = "Prediction Mode" # Drawing Mode: only index finger up if len(fingers) >= 3 and fingers[1] and not fingers[2] and mode == "Drawing Mode": if xp == 0 and yp == 0: xp, yp = x1, y1 xp, yp = x1, y1 if drawColor == (0, 0, 0): cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED) cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75) else: cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15) xp, yp = x1, y1 if mode == "Prediction Mode": if LeftBar is not None: img[125:425, 0:226] = LeftBar if len(fingers) >= 3 and fingers[1] and not fingers[2]: if xp == 0 and yp == 0: xp, yp = x1, y1 xp, yp = x1, y1 if drawColor == (0, 0, 0): cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED) cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75) else: cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15) xp, yp = x1, y1 # Leftbar actions if x1 < 300: if 150 < y1 < 300: submode = "Letter Prediction" cv2.imwrite("Output/Letter.png", canvas) predicted_letter, confidence = predict_from_image("Output/Letter.png") cv2.putText(img, f'Predicted Letter: {predicted_letter}', (50, 500), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 2) prediction_time = time.time() reset_canvas = True if 315 < y1 < 405: submode = "Word Prediction" cv2.imwrite("Output/Word.png", canvas) predictor = PredictWord("Output/Word.png") result = predictor.predict() print("Detected word:", result) PredictWord.save_and_speak_word(result, output_dir='output', filename='output.txt') canvas = np.zeros((720, 1280, 3), np.uint8) # # # Place this outside the x1 < 300 block, so it runs every frame # if reset_canvas and prediction_time is not None: # if time.time() - prediction_time > 5: # canvas = np.zeros((720, 1280, 3), np.uint8) # reset_canvas = False # prediction_time = None # Combine canvas and camera image using bitwise operations imgGray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY) _, imgInv = cv2.threshold(imgGray, 50, 255, cv2.THRESH_BINARY_INV) imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR) img = cv2.bitwise_and(img, imgInv) img = cv2.bitwise_or(img, canvas) # Calculate FPS (frames per second) pTime = time.time() fps = 1 / (pTime - cTime) if cTime != 0 else 0 cTime = pTime # Overlay header and RightBar only if they are loaded (robustness) if header is not None: img[0:125, 0:1280] = header if RightBar is not None: img[125:720, 1050:1280] = RightBar cv2.putText(img, f"Mode : {mode}", (1065, 645), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 255), 1) cv2.putText(img, f'FPS: {int(fps)}', (1095, 695), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 1) cv2.imshow("Canvas", canvas) cv2.imshow("Image", img) if cv2.waitKey(1) & 0xFF == ord('q'): break if __name__ == "__main__": main() cam.release() cv2.destroyAllWindows()