Setup¶
We begin by importing the necessary libraries and defining a utility class for timing our code block executions.
import time
from IPython.display import display, HTML
def countdown_timer(minutes):
display(HTML(f"""
<p id="timer" style="font-size:300px; color: red;"></p>
<script>
var minutes = {minutes};
var seconds = 0;
var timer = document.getElementById("timer");
function updateTimer() else else
var minutesDisplay = minutes.toString().padStart(2, '0');
var secondsDisplay = seconds.toString().padStart(2, '0');
timer.innerHTML = minutesDisplay + ":" + secondsDisplay;
setTimeout(updateTimer, 1000);
}}
}}
updateTimer();
</script>
"""))
# Run the countdown timer for x minutes
countdown_timer(6)
Imports¶
Loading NumPy, Matplotlib, and OpenCV for image processing.
import cv2
import numpy as np
import matplotlib.pyplot as plt
import scipy.ndimage as ndimage
import ipywidgets as widgets
import scipy.signal, scipy.misc
from ipywidgets import interact, IntSlider, FloatSlider
from matplotlib.animation import FuncAnimation
from tqdm import tqdm
from mpl_toolkits.mplot3d import Axes3D
from skimage import data
from skimage.color import rgb2gray
1. Fourier Transform¶
The Fourier Transform is a fundamental tool in signal processing and computer vision used to analyze the frequency characteristics of an image.
Fourier Transforms (FT) decompose a signal into its constituent frequencies. It converts a time or spatial domain signal into the frequency domain, allowing us to analyze the frequency content of signals. This is especially useful in many fields like signal processing, image processing, and data analysis.
In simple terms, a Fourier Transform breaks down a wave into a sum of sinusoids, each with a specific frequency, amplitude, and phase. For images, high frequencies correspond to sharp transitions (like edges), while low frequencies correspond to smooth, homogeneous regions.
For deeper understanding of how FTs work mathematically, you can refer to these resources:
1.1 Fourier Transforms on 1D Waves¶
Before diving into 2D images, let's look at how the Fourier Transform works on 1D signals (waves). We will create composite waves and see how the FT recovers their fundamental frequencies.
t = np.linspace(0, 1, 1000) # Time vector
freq1, freq2, freq3 = 5, 20, 50 # Frequencies in Hz
# Sum of three sine waves
signal = np.sin(2 * np.pi * freq1 * t) + 0.5 * np.sin(2 * np.pi * freq2 * t) + np.sin(2 * np.pi * freq3 * t)
signal_1 = np.sin(2 * np.pi * freq1 * t)
signal_2 = 0.5 * np.sin(2 * np.pi * freq2 * t)
signal_3 =np.sin(2 * np.pi * freq3 * t)
# Create subplots for each individual wave and the sum
fig, axs = plt.subplots(4, 1, figsize=(10, 10))
# Plot wave1 (5 Hz)
axs[0].plot(t, signal_1, color='r')
axs[0].set_title('5 Hz Sine Wave')
axs[0].set_xlabel('Time [s]')
axs[0].set_ylabel('Amplitude')
axs[0].set_ylim(-1.1, 1.1)
axs[0].grid(True)
# Plot wave2 (20 Hz)
axs[1].plot(t, signal_2, color='g')
axs[1].set_title('20 Hz Sine Wave')
axs[1].set_xlabel('Time [s]')
axs[1].set_ylabel('Amplitude')
axs[1].set_ylim(-1.1, 1.1)
axs[1].grid(True)
# Plot wave3 (50 Hz)
axs[2].plot(t, signal_3, color='b')
axs[2].set_title('50 Hz Sine Wave')
axs[2].set_xlabel('Time [s]')
axs[2].set_ylabel('Amplitude')
axs[2].set_ylim(-1.1, 1.1)
axs[2].grid(True)
# Plot the sum of all waves
axs[3].plot(t, signal, color='k')
axs[3].set_title('Sum of 5 Hz, 20 Hz, and 50 Hz Sine Waves')
axs[3].set_xlabel('Time [s]')
axs[3].set_ylabel('Amplitude')
axs[3].grid(True)
plt.tight_layout()
plt.show()
ft_signal = np.fft.fft(signal)
frequencies = np.fft.fftfreq(len(signal), t[1] - t[0])
# Plot the magnitude of the Fourier Transform
plt.figure(figsize=(10, 4))
plt.plot(frequencies[:len(frequencies)//2], np.abs(ft_signal)[:len(frequencies)//2])
plt.title('Fourier Transform: Frequency Components of the Signal')
plt.xlabel('Frequency [Hz]')
plt.ylabel('Amplitude')
plt.grid(True)
plt.show()
plt.figure(figsize=(10, 4))
plt.plot(frequencies[:len(frequencies)//2], np.abs(ft_signal)[:len(frequencies)//2])
plt.title('Fourier Transform: Frequency Components of the Signal (Truncated to first 60 frequencies)')
plt.xlabel('Frequency [Hz]')
plt.ylabel('Amplitude')
plt.xlim(0, 60)
plt.grid(True)
plt.show()
# Initial frequency and time parameters
t = np.linspace(0, 1, 1000) # Time vector
sampling_rate = len(t)
# Function to compute the signal and its Fourier transform
def compute_signal_and_fft(freq, amplitude):
signal = amplitude * np.sin(2 * np.pi * freq * t) # Sine wave with current frequency
fft_signal = np.fft.fft(signal) # Fourier transform
freqs = np.fft.fftfreq(sampling_rate, t[1] - t[0]) # Frequency domain
return signal, np.abs(fft_signal), freqs
# Function to plot signal and FFT with a given frequency
def plot_signal_and_fft(freq, amplitude):
signal, fft_signal, freqs = compute_signal_and_fft(freq, amplitude)
# Create the figure and two subplots: left for signal, right for Fourier transform
fig, (ax_signal, ax_fft) = plt.subplots(1, 2, figsize=(10, 5))
# Plot the sine wave
ax_signal.plot(t, signal, color='b')
ax_signal.set_title(f'Sine Wave (Frequency = {freq} Hz)')
ax_signal.set_xlabel('Time [s]')
ax_signal.set_ylabel('Amplitude')
ax_signal.set_ylim(-1.1, 1.1)
ax_signal.grid(True)
# Plot the Fourier transform
ax_fft.plot(freqs[:sampling_rate//2], fft_signal[:sampling_rate//2], color='r')
ax_fft.set_title('Fourier Transform')
ax_fft.set_xlabel('Frequency [Hz]')
ax_fft.set_ylabel('Amplitude')
ax_fft.set_xlim(0, 110)
ax_fft.set_ylim(0, 510)
ax_fft.grid(True)
plt.show()
# Create an interactive slider to control frequency
interact(plot_signal_and_fft, freq=IntSlider(min=1, max=100, step=7, value=5), amplitude=FloatSlider(min=0.1, max=1, step=0.1, value=0.5))
interactive(children=(IntSlider(value=5, description='freq', min=1, step=7), FloatSlider(value=0.5, descriptio…
plot_signal_and_fft
def plot_signal_and_fft(freq, amplitude)
<no docstring>
1.2 Fourier Transforms on 2D Images¶
Applying FT to 2D images reveals the spatial frequencies. The center of the transformed image represents low frequencies, while the edges represent high frequencies. This is incredibly useful for filtering, such as blurring (low-pass filter) or edge detection (high-pass filter).
# Function to generate checkerboard image
def generate_checkerboard(n_boxes):
board_size = 256 # Size of the checkerboard
x = np.linspace(0, 1, board_size)
y = np.linspace(0, 1, board_size)
X, Y = np.meshgrid(x, y)
checkerboard = np.sin(2 * np.pi * n_boxes * X) * np.sin(2 * np.pi * n_boxes * Y)
return checkerboard
# Function to compute the Fourier Transform of the image
def compute_fourier_transform(image):
ft_image = np.fft.fftshift(np.fft.fft2(image))
magnitude_spectrum = np.log(np.abs(ft_image) + 1)
return magnitude_spectrum
# Function to plot checkerboard image and its Fourier Transform
def plot_checkerboard_and_ft(n_boxes, rotation_angle):
# Generate the checkerboard and rotate it
checkerboard = generate_checkerboard(n_boxes)
rotated_checkerboard = ndimage.rotate(checkerboard, rotation_angle, reshape=False)
# Compute the Fourier Transform of the rotated checkerboard
ft_checkerboard = compute_fourier_transform(rotated_checkerboard)
# Create the figure and two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
# Plot the rotated checkerboard image
ax1.imshow(rotated_checkerboard, cmap='gray')
ax1.set_title(f'Checkerboard with {n_boxes*2} Boxes, Rotated by {rotation_angle}°')
ax1.axis('off')
# Plot the Fourier Transform magnitude spectrum
ax2.imshow(ft_checkerboard, cmap='inferno')
ax2.set_title('Fourier Transform (Magnitude Spectrum)')
ax2.axis('off')
plt.show()
# Create interactive sliders to control the number of boxes and the rotation angle
interact(plot_checkerboard_and_ft,
n_boxes=IntSlider(min=2, max=50, step=2, value=6),
rotation_angle=FloatSlider(min=0, max=180, step=5, value=0))
interactive(children=(IntSlider(value=6, description='n_boxes', max=50, min=2, step=2), FloatSlider(value=0.0,…
plot_checkerboard_and_ft
def plot_checkerboard_and_ft(n_boxes, rotation_angle)
<no docstring>
# Load the astronaut image
astronaut_image = data.astronaut()
# Convert the image to grayscale
gray_astronaut = rgb2gray(astronaut_image)
# Mirror the gray_astronaut image
mirrored_astronaut = np.flipud(gray_astronaut)
# Generate X and Y coordinates
x = np.linspace(0, mirrored_astronaut.shape[1], mirrored_astronaut.shape[1])
y = np.linspace(0, mirrored_astronaut.shape[0], mirrored_astronaut.shape[0])
x, y = np.meshgrid(x, y)
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
# Plot the surface with grayscale values as Z axis
ax.plot_surface(x, y, mirrored_astronaut, cmap='gray', edgecolor='none')
# Set labels
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Grayscale Value')
ax.set_zlim(0, 6)
ax.view_init(elev=30, azim=270)
# Show plot
plt.tight_layout()
plt.show()
# Compute the 2D Fourier Transform directly using fft2
ft_astronaut = np.fft.fft2(gray_astronaut)
# Shift the zero-frequency component to the center
ft_shift = np.fft.fftshift(ft_astronaut)
# Compute the magnitude spectrum (log scale to enhance visibility)
magnitude_spectrum = np.log(np.abs(ft_shift) + 1)
# Now let's do it step by step (row-wise FFT followed by column-wise FFT)
# Step 1: Compute the row-wise 1D FFT
row_fft = np.fft.fft(gray_astronaut, axis=1)
# Step 2: Compute the column-wise 1D FFT on the result of the row-wise FFT
manual_fft = np.fft.fft(row_fft, axis=0)
# Shift the zero-frequency component to the center for manual FFT
manual_shift = np.fft.fftshift(manual_fft)
# Compute the magnitude spectrum for the manually computed FFT
manual_magnitude_spectrum = np.log(np.abs(manual_shift) + 1)
# Plot the images
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
# Display the original grayscale image
ax1.imshow(gray_astronaut, cmap='gray')
ax1.set_title('Grayscale Astronaut Image')
ax1.axis('off')
# Display the magnitude spectrum of the regular 2D Fourier Transform
ax2.imshow(magnitude_spectrum, cmap='inferno')
ax2.set_title('Fourier Transform (Magnitude Spectrum)')
ax2.axis('off')
# Display the magnitude spectrum of the manual step-by-step FFT
ax3.imshow(manual_magnitude_spectrum, cmap='inferno')
ax3.set_title('Manual FFT (Row-wise and Column-wise)')
ax3.axis('off')
plt.show()
2. Contour Tracking¶
Contours are continuous curves bounding continuous regions of similar intensity. They are useful tools for shape analysis and object detection and recognition.
cv2.findContours() uses Suzuki's algorithm, also known as the Suzuki-Abe algorithm, which is a contour-following technique based on a border-following method for digital images. The algorithm finds the boundaries of objects and stores them as a series of points ((x, y) coordinates) that make up the contour.
Steps of Suzuki's Algorithm:
Start Point: The algorithm begins from a starting pixel on the image (usually a white pixel in a binary image). It checks the neighboring pixels to identify which ones are part of the same boundary.
Boundary Tracing: The algorithm traces the boundary of an object by following the pixels that define the contour. It moves pixel by pixel along the boundary, recording the coordinates of each pixel.
Hierarchy Representation: The Suzuki algorithm also builds a hierarchy of contours, where it detects nested contours (holes within objects). This is useful when contours overlap or one object is inside another.
The second argument specifies which contours should be retrieved:
- RETR_EXTERNAL: Retrieves only the extreme outer contours (ignores child contours).
- RETR_TREE: Retrieves all contours and reconstructs the full hierarchy of nested contours.
- RETR_CCOMP: Retrieves all contours and organizes them into two levels (external and internal).
- RETR_LIST: Retrieves all contours without establishing any hierarchy.
The third argument specifies how the contour points should be approximated. There are two main types:
CHAIN_APPROX_NONE: The algorithm stores all the boundary points. This means it keeps every pixel that forms the contour, resulting in high accuracy but with many points.
CHAIN_APPROX_SIMPLE: The algorithm compresses horizontal, vertical, and diagonal segments, and only stores the end points of the segments. This reduces the number of points stored, resulting in an approximation that is less memory-intensive but faster.
# Load the coins image
coins_image = data.coins()
# Thresholding the image to binary (for better contour detection)
_, threshold_image = cv2.threshold(coins_image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# Find contours using cv2.findContours
contours, hierarchy = cv2.findContours(threshold_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Convert grayscale image to BGR for color contour drawing
rgb_image = cv2.cvtColor(coins_image, cv2.COLOR_GRAY2BGR)
# Draw contours in red (BGR = [0, 0, 255])
cv2.drawContours(rgb_image, contours, -1, (255, 0, 0), 2)
# Display original, thresholded, and contour images side by side
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6))
# Original Image
ax1.imshow(coins_image, cmap='gray')
ax1.set_title('Original Image')
ax1.axis('off')
# Thresholded Image
ax2.imshow(threshold_image, cmap='gray')
ax2.set_title('Thresholded Image')
ax2.axis('off')
# Contours on the Image
ax3.imshow(rgb_image)
ax3.set_title('Contours')
ax3.axis('off')
plt.show()
3. RANSAC (RANdom SAmple Consensus)¶
RANSAC is a robust iterative algorithm used to estimate parameters of a mathematical model from a set of observed data that contains outliers.
RANSAC is an algorithm in computer vision, particularly for fitting models to data when the dataset contains outliers.
Use Case of RANSAC in Computer Vision:
- Feature Matching: When matching points between two images (like in stereo vision or panorama stitching), many incorrect matches (outliers) can occur due to noise, occlusions, or repetitive patterns. RANSAC helps filter out the incorrect matches.
- Model Fitting: RANSAC is frequently used for fitting models like lines, planes, or homographies in tasks like object recognition, 3D reconstruction, or image stitching.
- Pose Estimation: In pose estimation, RANSAC is used to estimate the best-fitting pose (translation and rotation) of an object or camera.
How RANSAC Works:
- Define the Model: The algorithm needs a model to fit the data. In the case of line fitting, the model could be the equation of a line. For homography, the model would be a transformation matrix.
- Random Sampling: RANSAC randomly samples a subset of data points (the minimal number required for the model, called inliers).
- Model Estimation: Using the sampled points, RANSAC estimates the model parameters.
- Consensus Step: RANSAC then checks how many other points in the dataset fit this model well (i.e., how many inliers there are).
- Iteration: This process is repeated a fixed number of times, and the model with the most inliers is selected as the final solution.
Homography: Homography is a transformation to map points from one plane to another, preserving straight lines. It is commonly used in image stitching, perspective correction, and camera calibration.
In mathematical terms, a homography is represented by a 3x3 matrix H, which transforms points between two planes in a projective space. The relation between a point p1 in one image and its corresponding point p2 in another image can be expressed as: p2 = H * p1 where H is the homography matrix, and the points are in homogeneous coordinates.
Key Points:
- Inliers are points that fit the model within a certain threshold.
- Outliers are points that do not fit the model and are ignored.
- RANSAC is non-deterministic and relies on a high number of iterations to increase the chance of finding a model that maximizes inliers.
# Generate synthetic data with a line y = 2x + 1
np.random.seed(42)
n_samples = 100
n_outliers = 100
# Generate inliers
X_inliers = np.linspace(-10, 10, n_samples)
y_inliers = 2 * X_inliers + 1 + np.random.normal(size=n_samples)
# Generate outliers
X_outliers = np.random.uniform(-10, 10, n_outliers)
y_outliers = np.random.uniform(-20, 20, n_outliers)
# Combine inliers and outliers
X = np.concatenate([X_inliers, X_outliers])
y = np.concatenate([y_inliers, y_outliers])
# Scatter plot of the data (with outliers)
plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', s=10)
plt.title('Data with Inliers and Outliers')
plt.xlabel('X')
plt.ylabel('y')
plt.show()
Overview of np.linalg.lstsq()
np.linalg.lstsq() is a function in NumPy that solves linear least squares problems. It is used to find the best-fit solution to a system of linear equations when there are more equations than unknowns (overdetermined) or fewer equations than unknowns (underdetermined). This method is commonly applied in linear regression or curve fitting.
How It Works:
The least squares method finds the solution ( x ) that minimizes the residual sum of squares between the observed data ( b ) and the predictions made by the model ( A \cdot x ). In matrix form, the problem can be written as: [ \min_x ||A \cdot x - b||^2 ] Where:
- ( A ) is the matrix of input data (e.g., X-values),
- ( x ) is the vector of unknown parameters (e.g., slope and intercept),
- ( b ) is the vector of observed values (e.g., Y-values).
Function Syntax:
x, residuals, rank, s = np.linalg.lstsq(A, b, rcond=None)
Parameters:
a (M, N) array_like
"Coefficient" matrix.b {(M,), (M, K)} array_like
Ordinate or "dependent variable" values. Ifbis two-dimensional, the least-squares solution is calculated for each of the K columns ofb.rcond float, optional
Cut-off ratio for small singular values ofa. For the purposes of rank determination, singular values are treated as zero if they are smaller thanrcondtimes the largest singular value ofa. The default uses the machine precision times max(M, N). Passing-1will use machine precision.
Returns:
x {(N,), (N, K)} ndarray
Least-squares solution. Ifbis two-dimensional, the solutions are in the K columns ofx.residuals {(1,), (K,), (0,)} ndarray
Sums of squared residuals: Squared Euclidean 2-norm for each column inb - a @ x. If the rank ofais < N or M <= N, this is an empty array. Ifbis 1-dimensional, this is a (1,) shape array. Otherwise, the shape is (K,).rank int
Rank of matrixa.s (min(M, N),) ndarray
Singular values ofa.
# Fit a line using all points
A = np.vstack([X, np.ones(len(X))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0] # y = mx + c
print(f"Line fit without handling outliers: y = {m:.2f}x + {c:.2f}")
# Plot the least squares fit (with outliers)
plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', s=10, label='Data Points')
plt.plot(X, m * X + c, color='green', label='Least Squares Fit (With Outliers)', linewidth=2)
plt.title('Line Fit Without Handling Outliers')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
Line fit without handling outliers: y = 0.70x + 1.11
- Randomly Sample: Pick two random points (as a minimal set to fit a line).
- Fit a Line: Compute the equation of the line through the sampled points.
- Measure Consensus: For all other points, calculate how close they are to the line and classify them as inliers or outliers based on a distance threshold.
- Repeat: Repeat the process for a fixed number of iterations, keeping track of the line with the most inliers.
- Return the Best Model: After all iterations, the line with the most inliers is the best estimate.
# RANSAC parameters
n_iterations = 5000
threshold = 1.0
best_inliers_count = 0
best_model = None
ms, cs, samples = [], [], []
# RANSAC loop
for i in tqdm(range(n_iterations)):
# Randomly sample 2 points
sample_indices = np.random.choice(len(X), 2, replace=False)
X_sample, y_sample = X[sample_indices], y[sample_indices]
samples.append(sample_indices)
# Fit a line using the 2 points
A = np.vstack([X_sample, np.ones(2)]).T
model, _, _, _ = np.linalg.lstsq(A, y_sample, rcond=None)
# y = mx + c
m, c = model
ms.append(m)
cs.append(c)
# Count inliers (points close to the line)
y_pred = m * X + c
inliers = np.abs(y - y_pred) < threshold
inliers_count = np.sum(inliers)
# Keep the model with the most inliers
if inliers_count > best_inliers_count:
best_inliers_count = inliers_count
best_model = model
# Extract the best line model (m and c)
m_best, c_best = best_model
print(f"Best model: y = {m_best:.2f}x + {c_best:.2f}, with {best_inliers_count} inliers")
# Plot the data and the best line
plt.figure(figsize=(8, 6))
plt.scatter(X, y, color='blue', s=10, label='Data Points')
plt.plot(X, m_best * X + c_best, color='red', label='RANSAC Best Line')
plt.title('RANSAC - Line Fitting with Inliers and Outliers')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
100%|██████████| 5000/5000 [00:00<00:00, 8217.92it/s]
Best model: y = 2.00x + 1.08, with 76 inliers
4. Hough Transform¶
The Hough Transform is a powerful feature extraction technique used in image analysis to detect simple shapes such as lines, circles, or ellipses.
Image references: FPCV
4.1 Manual Implementation of Hough Line Transform¶
To understand the inner workings, let's implement the Hough transform accumulator array manually.
# Load the image
gray_image = data.brick()
# Display the grayscale image
plt.figure(figsize=(6, 6))
plt.imshow(gray_image, cmap='gray')
plt.title('Grayscale Bricks Image')
plt.axis('off')
plt.show()
# Apply Canny edge detection
edges = cv2.Canny(gray_image, threshold1=100, threshold2=200)
# Display the edge-detected image
plt.figure(figsize=(6, 6))
plt.imshow(edges, cmap='gray')
plt.title('Edge Detection (Canny)')
plt.axis('off')
plt.show()
%%time
# Define the Hough Transform space
# We are using 180 degrees for theta, and the diagonal of the image for rho
hough_space = np.zeros((int(np.sqrt(edges.shape[0]**2 + edges.shape[1]**2)), 180), dtype=np.uint64)
# Create a list of theta values from 0 to 180 degrees (converted to radians)
thetas = np.deg2rad(np.arange(0, 180))
# Get the edge pixel locations
edge_points = np.argwhere(edges)
# Iterate over each edge point
for y, x in edge_points:
for theta_index in range(len(thetas)):
theta = thetas[theta_index]
# Calculate rho using the Hough Transform equation: ρ = x * cos(θ) + y * sin(θ)
rho = int(x * np.cos(theta) + y * np.sin(theta))
hough_space[rho, theta_index] += 1
# Plot the Hough space
plt.figure(figsize=(15, 6))
plt.imshow(hough_space, cmap='viridis', aspect='auto')
plt.title('Hough Space')
plt.xlabel('Theta (degrees)')
plt.ylabel('Rho (distance from origin)')
plt.colorbar(label='Votes')
plt.show()
CPU times: user 25.8 s, sys: 292 ms, total: 26.1 s Wall time: 43.6 s
# Find the indices of the maximum values in the Hough space (peaks)
# Set a threshold for the number of votes
threshold = 150
peaks = np.argwhere(hough_space > threshold)
# Show the peaks on the Hough space
plt.figure(figsize=(10, 6))
plt.imshow(hough_space, cmap='gray', aspect='auto')
plt.scatter(peaks[:, 1], peaks[:, 0], color='red')
plt.title('Hough Space with Peaks')
plt.xlabel('Theta (degrees)')
plt.ylabel('Rho (distance from origin)')
plt.show()
# Create a copy of the original image to draw the lines on
output_image = np.copy(gray_image)
# Iterate over the peaks
for rho, theta_index in peaks:
theta = thetas[theta_index]
# Convert back from polar (rho, theta) to Cartesian coordinates
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
# Define two points that form the line
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
# Draw the line on the image (in red)
cv2.line(output_image, (x1, y1), (x2, y2), (255, 0, 0), 2)
# Plot the original image with detected lines
plt.figure(figsize=(6, 6))
plt.imshow(output_image)
plt.title('Detected Lines on Image')
plt.axis('off')
plt.show()
4.2 Hough Transform using OpenCV¶
OpenCV provides highly optimized functions for Hough Transforms. cv2.HoughLines() returns $(r, \theta)$ parameters for standard Hough Transform, while cv2.HoughLinesP() provides the Probabilistic Hough Transform, returning the endpoints of the detected line segments, which is often more useful in practice.
# Load the image
gray_image = data.brick()
# Apply Canny edge detection
edges = cv2.Canny(gray_image, threshold1=100, threshold2=200)
# Perform the Hough Line Transform using OpenCV's built-in function
lines = cv2.HoughLines(edges, rho=1, theta=np.pi/180, threshold=150)
# Create a copy of the original image to draw the lines on
output_image = np.copy(gray_image)
# Draw the lines on the image
for line in lines:
rho, theta = line[0]
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
# Draw the lines
cv2.line(output_image, (x1, y1), (x2, y2), (255, 0, 0), 2)
# Plot the original image and the image with detected lines
plt.figure(figsize=(10, 5))
# Original image
plt.subplot(1, 2, 1)
plt.imshow(gray_image, cmap='gray')
plt.title('Original Image')
plt.axis('off')
# Image with Hough lines
plt.subplot(1, 2, 2)
plt.imshow(output_image)
plt.title('Hough Lines Detected')
plt.axis('off')
plt.show()