0

I have trained 2 yolo v5 models (best.pt) , 1 for segmentation and 1 for object detection with 2 different datasets,i want to create a egyptian glyph detction system in my pycharm locally, how do i actually use these models ? Any step by step tutorial ?

import torch
from PIL import Image
import numpy as np
import cv2

# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'custom',
                       path=r'C:\Users\user\PycharmProjects\Segmentation\hieroglyphics_recognition\models\segmentbest.pt')


def letterbox_image(image, new_shape=(640, 640), color=(114, 114, 114)):
    # Resize image to a 32-multiple rectangular
    shape = image.shape[:2]  # current shape [height, width]
    ratio = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    new_unpad = (int(shape[1] * ratio), int(shape[0] * ratio))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
    dw, dh = np.mod(dw, 32) / 2, np.mod(dh, 32) / 2  # divide padding into 2 sides

    if shape[::-1] != new_unpad:  # resize
        image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return image


def load_image(image_path):
    # Load image using PIL and convert to NumPy array
    image = Image.open(image_path).convert('RGB')
    img_array = np.array(image)
    return img_array


def predict(image_path):
    # Load and prepare the image
    img_array = load_image(image_path)

    # Resize and pad the image
    img_array = letterbox_image(img_array, new_shape=(640, 640))

    # Convert to torch tensor
    img_tensor = torch.from_numpy(img_array).float().div(255.0).permute(2, 0, 1).unsqueeze(
        0)  # [C, H, W] to [1, C, H, W]

    # Perform prediction
    results = model(img_tensor)

    # Process results and draw bounding boxes
    labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
    img = img_array.copy()
    n = len(labels)

    for i in range(n):
        box = cord[i]
        x1, y1, x2, y2 = int(box[0] * img.shape[1]), int(box[1] * img.shape[0]), int(box[2] * img.shape[1]), int(
            box[3] * img.shape[0])
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label = f'{model.names[int(labels[i])]} {results.xyxyn[0][i, -2]:.2f}'
        cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Convert back to PIL Image for showing
    img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    img.show()


if __name__ == "__main__":
    image_path = r"C:\Users\user\Downloads\writing-scripts-egyptian-hieroglyph-after-inscription-circa-2700-bc-wood-engraving-19th-century-early-dynastic-period-early-period-early-dynastic-period-of-egypt-second-dynasty-pictogram-pictograph-pictograms-pictog.jpg"
    predict(image_path)

i did try this piece of code, but it wasn't detecting accurately like how it did in google colab

0