I have trained 2 yolo v5 models (best.pt) , 1 for segmentation and 1 for object detection with 2 different datasets,i want to create a egyptian glyph detction system in my pycharm locally, how do i actually use these models ? Any step by step tutorial ?
import torch
from PIL import Image
import numpy as np
import cv2
# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'custom',
path=r'C:\Users\user\PycharmProjects\Segmentation\hieroglyphics_recognition\models\segmentbest.pt')
def letterbox_image(image, new_shape=(640, 640), color=(114, 114, 114)):
# Resize image to a 32-multiple rectangular
shape = image.shape[:2] # current shape [height, width]
ratio = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
new_unpad = (int(shape[1] * ratio), int(shape[0] * ratio))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
dw, dh = np.mod(dw, 32) / 2, np.mod(dh, 32) / 2 # divide padding into 2 sides
if shape[::-1] != new_unpad: # resize
image = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return image
def load_image(image_path):
# Load image using PIL and convert to NumPy array
image = Image.open(image_path).convert('RGB')
img_array = np.array(image)
return img_array
def predict(image_path):
# Load and prepare the image
img_array = load_image(image_path)
# Resize and pad the image
img_array = letterbox_image(img_array, new_shape=(640, 640))
# Convert to torch tensor
img_tensor = torch.from_numpy(img_array).float().div(255.0).permute(2, 0, 1).unsqueeze(
0) # [C, H, W] to [1, C, H, W]
# Perform prediction
results = model(img_tensor)
# Process results and draw bounding boxes
labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
img = img_array.copy()
n = len(labels)
for i in range(n):
box = cord[i]
x1, y1, x2, y2 = int(box[0] * img.shape[1]), int(box[1] * img.shape[0]), int(box[2] * img.shape[1]), int(
box[3] * img.shape[0])
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
label = f'{model.names[int(labels[i])]} {results.xyxyn[0][i, -2]:.2f}'
cv2.putText(img, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Convert back to PIL Image for showing
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
img.show()
if __name__ == "__main__":
image_path = r"C:\Users\user\Downloads\writing-scripts-egyptian-hieroglyph-after-inscription-circa-2700-bc-wood-engraving-19th-century-early-dynastic-period-early-period-early-dynastic-period-of-egypt-second-dynasty-pictogram-pictograph-pictograms-pictog.jpg"
predict(image_path)
i did try this piece of code, but it wasn't detecting accurately like how it did in google colab