上手
卷积神经网络
首先 这是一个canny边缘检测 能非常明晰地获取你的边缘
重要的点是 下面有一个拉框 你能通过调整其值 来了解到那些地方会很明显
import cv2
# 定义滑动条的回调函数(必须要有,但不做实际操作)
def nothing(x):
pass
# 打开默认摄像头
cap = cv2.VideoCapture(0)
# 创建一个窗口用于显示
cv2.namedWindow('Canny Edges')
# 创建两个滑动条用来调节 Canny 边缘检测的两个阈值
cv2.createTrackbar('Min Threshold', 'Canny Edges', 0, 255, nothing) # 最小阈值范围 0-255
cv2.createTrackbar('Max Threshold', 'Canny Edges', 0, 255, nothing) # 最大阈值范围 0-255
# 设置滑动条初始值
cv2.setTrackbarPos('Min Threshold', 'Canny Edges', 100)
cv2.setTrackbarPos('Max Threshold', 'Canny Edges', 200)
while True:
# 读取摄像头帧
ret, frame = cap.read()
if not ret:
print("无法读取摄像头帧")
break
# 转换为灰度图
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 从滑动条中获取当前的阈值
min_val = cv2.getTrackbarPos('Min Threshold', 'Canny Edges')
max_val = cv2.getTrackbarPos('Max Threshold', 'Canny Edges')
# 应用 Canny 边缘检测
edges = cv2.Canny(gray, min_val, max_val)
# 显示原始视频帧和边缘检测结果
cv2.imshow('Original Frame', frame)
cv2.imshow('Canny Edges', edges)
# 按下 'q' 键退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 释放摄像头资源并关闭所有窗口
cap.release()
cv2.destroyAllWindows()
但是 为什么要会这个呢
原因是...
在1980年代,神经科学家David H. Hubel和Torsten Wiesel研究了猫和猴子的视觉皮层(V1区)对视觉信号的反应。他们发现,视觉皮层中的神经元会响应图像中的某些特定的模式,比如边缘、方向和运动等。
Yann LeCun在1990年代提出了基于分层结构的卷积神经网络,用于图像识别任务。最早的CNN模型(如LeNet-5)就成功应用于手写数字识别(如MNIST数据集)。
那么就尝试一下最简单的卷积神经网络
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# 定义超参数
batch_size = 64
learning_rate = 0.001
num_epochs = 5
# 加载MNIST数据集
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
# 定义卷积神经网络
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2)
self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.fc1 = nn.Linear(32 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.pool(torch.relu(self.conv1(x)))
x = self.pool(torch.relu(self.conv2(x)))
x = x.view(-1, 32 * 7 * 7) # 展平
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return x
# 初始化模型、损失函数和优化器
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 训练模型
for epoch in range(num_epochs):
model.train()
for images, labels in train_loader:
# 前向传播
outputs = model(images)
loss = criterion(outputs, labels)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
# 测试模型
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader:
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the model on the test images: {100 * correct / total:.2f}%')
Last updated