diff --git a/assignment5/a5_utils.py b/assignment5/a5_utils.py new file mode 100644 index 0000000..73224e6 --- /dev/null +++ b/assignment5/a5_utils.py @@ -0,0 +1,29 @@ +import numpy as np +import cv2 +from matplotlib import pyplot as plt + +def normalize_points(P): + # P must be a Nx2 vector of points + # first coordinate is x, second is y + + # returns: normalized points in homogeneous coordinates and 3x3 transformation matrix + + mu = np.mean(P, axis=0) # mean + scale = np.sqrt(2) / np.mean(np.sqrt(np.sum((P-mu)**2,axis=1))) # scale + T = np.array([[scale, 0, -mu[0]*scale],[0, scale, -mu[1]*scale],[0,0,1]]) # transformation matrix + P = np.hstack((P,np.ones((P.shape[0],1)))) # homogeneous coordinates + res = np.dot(T,P.T).T + return res, T + +def draw_epiline(l,h,w): + # l: line equation (vector of size 3) + # h: image height + # w: image width + + x0, y0 = map(int, [0, -l[2]/l[1]]) + x1, y1 = map(int, [w-1, -(l[2]+l[0]*w)/l[1]]) + + plt.plot([x0,x1],[y0,y1],'r') + + plt.ylim([0,h]) + plt.gca().invert_yaxis() \ No newline at end of file diff --git a/assignment5/data/desk/DSC02638.JPG b/assignment5/data/desk/DSC02638.JPG new file mode 100644 index 0000000..bb9a16c Binary files /dev/null and b/assignment5/data/desk/DSC02638.JPG differ diff --git a/assignment5/data/desk/DSC02639.JPG b/assignment5/data/desk/DSC02639.JPG new file mode 100644 index 0000000..b0dd3c0 Binary files /dev/null and b/assignment5/data/desk/DSC02639.JPG differ diff --git a/assignment5/data/desk/DSC02640.JPG b/assignment5/data/desk/DSC02640.JPG new file mode 100644 index 0000000..2096656 Binary files /dev/null and b/assignment5/data/desk/DSC02640.JPG differ diff --git a/assignment5/data/desk/DSC02641.JPG b/assignment5/data/desk/DSC02641.JPG new file mode 100644 index 0000000..919788d Binary files /dev/null and b/assignment5/data/desk/DSC02641.JPG differ diff --git a/assignment5/data/desk/DSC02642.JPG b/assignment5/data/desk/DSC02642.JPG new file mode 100644 index 0000000..963bbba Binary files /dev/null and b/assignment5/data/desk/DSC02642.JPG differ diff --git a/assignment5/data/disparity/cporta_left.png b/assignment5/data/disparity/cporta_left.png new file mode 100644 index 0000000..e4376fe Binary files /dev/null and b/assignment5/data/disparity/cporta_left.png differ diff --git a/assignment5/data/disparity/cporta_right.png b/assignment5/data/disparity/cporta_right.png new file mode 100644 index 0000000..032c5ca Binary files /dev/null and b/assignment5/data/disparity/cporta_right.png differ diff --git a/assignment5/data/disparity/office2_left.png b/assignment5/data/disparity/office2_left.png new file mode 100644 index 0000000..baebf37 Binary files /dev/null and b/assignment5/data/disparity/office2_left.png differ diff --git a/assignment5/data/disparity/office2_right.png b/assignment5/data/disparity/office2_right.png new file mode 100644 index 0000000..c910d8f Binary files /dev/null and b/assignment5/data/disparity/office2_right.png differ diff --git a/assignment5/data/disparity/office_left.png b/assignment5/data/disparity/office_left.png new file mode 100644 index 0000000..602295d Binary files /dev/null and b/assignment5/data/disparity/office_left.png differ diff --git a/assignment5/data/disparity/office_right.png b/assignment5/data/disparity/office_right.png new file mode 100644 index 0000000..5e9d266 Binary files /dev/null and b/assignment5/data/disparity/office_right.png differ diff --git a/assignment5/data/epipolar/house1.jpg b/assignment5/data/epipolar/house1.jpg new file mode 100644 index 0000000..059a8f9 Binary files /dev/null and b/assignment5/data/epipolar/house1.jpg differ diff --git a/assignment5/data/epipolar/house1_camera.txt b/assignment5/data/epipolar/house1_camera.txt new file mode 100644 index 0000000..7f3c879 --- /dev/null +++ b/assignment5/data/epipolar/house1_camera.txt @@ -0,0 +1,3 @@ + 1.6108033e+001 1.3704159e+001 -6.7351564e+001 -1.8838024e+002 + 8.2886212e-001 -6.1257005e+001 -2.7985739e+001 -7.4190016e+000 + 1.6739784e-001 -4.5720139e-002 -8.4811075e-002 5.6548906e-001 diff --git a/assignment5/data/epipolar/house2.jpg b/assignment5/data/epipolar/house2.jpg new file mode 100644 index 0000000..b4a9b0c Binary files /dev/null and b/assignment5/data/epipolar/house2.jpg differ diff --git a/assignment5/data/epipolar/house2_camera.txt b/assignment5/data/epipolar/house2_camera.txt new file mode 100644 index 0000000..aff2213 --- /dev/null +++ b/assignment5/data/epipolar/house2_camera.txt @@ -0,0 +1,3 @@ + 1.0571624e+001 4.0812730e+000 -2.2538413e+001 -5.9593366e+001 + 3.1827253e-001 -2.1616617e+001 -9.8820962e+000 -2.7146868e+000 + 6.1142503e-002 -2.0656640e-002 -2.0701037e-002 2.5211789e-001 diff --git a/assignment5/data/epipolar/house_fundamental.txt b/assignment5/data/epipolar/house_fundamental.txt new file mode 100644 index 0000000..2a53426 --- /dev/null +++ b/assignment5/data/epipolar/house_fundamental.txt @@ -0,0 +1,3 @@ +-0.000000885211824 -0.000005615918803 0.001943109518320 +0.000009392818702 0.000000616883199 -0.012006630150442 +-0.001203084137613 0.011037006977740 -0.085317335867129 diff --git a/assignment5/data/epipolar/house_matches.txt b/assignment5/data/epipolar/house_matches.txt new file mode 100644 index 0000000..840f8b7 --- /dev/null +++ b/assignment5/data/epipolar/house_matches.txt @@ -0,0 +1,168 @@ + 1.7918200e+002 1.4480400e+002 1.9506100e+002 2.0622700e+002 + 6.1975500e+001 2.5237200e+002 3.8994100e+001 2.3608350e+002 + 3.6931350e+001 1.7577400e+002 4.9915800e+001 1.6315300e+002 + 2.5288250e+002 1.7216700e+002 2.2016750e+002 1.7711100e+002 + 3.2102350e+002 1.4967350e+002 3.0108200e+002 1.5957050e+002 + 2.3981000e+002 2.3064950e+002 2.1114250e+002 2.3486800e+002 + 2.8553800e+002 1.8735650e+002 2.4999600e+002 1.9598200e+002 + 2.8752550e+002 2.1648500e+002 1.7304900e+002 2.4019300e+002 + 8.3516000e+001 1.8413600e+002 8.7022000e+001 1.7447850e+002 + 2.4626850e+002 1.1257100e+002 2.1107700e+002 1.1559400e+002 + 2.8051150e+002 2.2442250e+002 2.4488300e+002 2.3413700e+002 + 2.9222950e+002 1.8776050e+002 2.6047650e+002 1.9700200e+002 + 8.0361000e+001 2.3195900e+002 4.4816200e+001 1.5298600e+002 + 1.3405950e+002 2.1506400e+002 1.1857700e+002 2.0863650e+002 + 1.0150850e+002 1.1811850e+002 1.1154700e+002 1.1312950e+002 + 1.1176850e+002 1.9390400e+002 1.0818650e+002 1.8639750e+002 + 3.1303750e+002 6.3000500e+001 2.8886000e+002 6.6640500e+001 + 3.2170900e+001 2.6592200e+002 1.2461050e+001 2.4410900e+002 + 3.1583150e+002 1.3711850e+002 2.9242350e+002 1.4630450e+002 + 1.2255200e+002 1.5963750e+002 1.2642000e+002 1.5459400e+002 + 2.3903100e+002 1.1067800e+002 2.2058950e+002 1.1328800e+002 + 2.8752550e+002 2.1648500e+002 2.5551700e+002 2.2603200e+002 + 3.1789700e+002 1.2829450e+002 2.9548050e+002 1.3657600e+002 + 6.8518500e+001 1.9854650e+002 6.8640000e+001 1.8656350e+002 + 3.0756150e+002 2.5994000e+002 2.6963400e+002 2.7415950e+002 + 3.0108400e+002 1.7968850e+002 2.7227750e+002 1.9002000e+002 + 2.6805100e+002 2.3139700e+002 1.2610700e+002 1.2922500e+002 + 3.4756300e+002 1.7979550e+002 1.8291800e+002 1.8259850e+002 + 2.8527600e+002 2.1764500e+002 2.5241150e+002 2.2791300e+002 + 3.5513400e+002 1.6721850e+002 3.6814550e+002 1.8297050e+002 + 1.8066250e+002 2.2892400e+002 7.1144500e+001 1.4490950e+002 + 1.8062300e+002 1.9588000e+002 1.6667450e+002 1.9453550e+002 + 2.1837400e+002 2.0376550e+002 1.6342400e+002 1.5180550e+002 + 1.5030000e+002 2.0854100e+002 1.8265850e+002 1.7396650e+002 + 3.2523350e+002 1.4903450e+002 3.0816400e+002 1.6008600e+002 + 2.8740050e+002 2.1190450e+002 2.2859100e+002 1.2327800e+002 + 1.8387500e+002 8.6641500e+001 1.8093250e+002 8.6214500e+001 + 2.4990350e+002 1.1365050e+002 1.2886550e+002 1.3049800e+002 + 1.0095700e+002 1.3465200e+002 1.1176400e+002 1.2888750e+002 + 5.6088000e+001 1.5065400e+002 2.1351100e+002 1.8796300e+002 + 4.2722000e+001 1.7362150e+002 5.5217000e+001 1.6155800e+002 + 2.4575550e+002 1.7222450e+002 1.4366850e+002 6.6007500e+001 + 3.6407250e+002 1.9206950e+002 3.6692550e+002 2.1063000e+002 + 4.1618200e+001 1.7130050e+002 5.5475000e+001 1.5961500e+002 + 2.6041450e+002 2.1894350e+002 2.2666700e+002 2.2586250e+002 + 1.4998800e+002 1.0798350e+002 1.4855600e+002 1.0600400e+002 + 2.0334150e+002 5.3067500e+001 2.0399900e+002 5.3458000e+001 + 9.4893500e+001 1.2663100e+002 1.0741250e+002 1.2088600e+002 + 1.5687100e+002 2.4257150e+002 1.2788050e+002 2.3708150e+002 + 6.1975500e+001 2.5237200e+002 1.0978200e+002 1.0504350e+002 + 2.4169600e+002 1.1238700e+002 2.2238100e+002 1.1529450e+002 + 1.5687100e+002 2.4257150e+002 2.2003500e+002 1.8108050e+002 + 2.8553800e+002 1.8735650e+002 1.6053100e+002 2.2765300e+002 + 2.6596500e+002 2.4005550e+002 2.3147100e+002 2.4844750e+002 + 2.4990350e+002 1.1365050e+002 2.2924850e+002 1.1702150e+002 + 1.6087250e+002 1.4599600e+002 1.5155750e+002 1.4373600e+002 + 2.4626850e+002 1.1257100e+002 2.2645100e+002 1.1562050e+002 + 1.5455250e+002 2.1040350e+002 1.8288300e+002 1.8439900e+002 + 1.0137850e+002 1.3591000e+002 1.8241150e+002 1.5931950e+002 + 1.4433350e+002 2.0504300e+002 1.3153400e+002 1.9987250e+002 + 2.1356300e+002 2.1005050e+002 8.1403000e+001 1.4930350e+002 + 3.0690550e+002 1.3915100e+002 1.6471200e+002 1.4424850e+002 + 1.7242650e+002 2.2057000e+002 1.5083300e+002 2.1785750e+002 + 2.5155300e+002 2.1905500e+002 2.1929050e+002 2.2458700e+002 + 3.4451900e+001 2.2563950e+002 2.8594550e+001 2.0850100e+002 + 1.5030000e+002 2.0854100e+002 1.3601750e+002 2.0423250e+002 + 1.8066250e+002 2.2892400e+002 1.5533050e+002 2.2653700e+002 + 2.3232950e+002 1.1404500e+002 2.1404300e+002 1.1640950e+002 + 2.1356300e+002 2.1005050e+002 1.9165250e+002 2.1161750e+002 + 3.2473050e+002 2.4401900e+002 2.9429500e+002 2.6040700e+002 + 1.5455250e+002 2.1040350e+002 1.3849450e+002 2.0611350e+002 + 2.5407600e+002 2.3183650e+002 2.2241750e+002 2.3866850e+002 + 1.7747850e+002 1.5331750e+002 1.0307000e+002 1.8974250e+002 + 1.1764400e+002 1.3685650e+002 1.2326950e+002 1.3158800e+002 + 3.0529400e+002 1.9526650e+002 1.5803850e+002 2.2521350e+002 + 3.2523350e+002 1.4903450e+002 1.8651150e+002 4.4059250e+001 + 2.9683100e+002 2.0801450e+002 2.6987100e+002 2.1891300e+002 + 6.2727000e+001 1.6241450e+002 7.7038000e+001 1.5270200e+002 + 9.4893500e+001 1.2663100e+002 2.2716350e+002 1.9914500e+002 + 6.2285000e+001 1.8180850e+002 6.9213000e+001 1.7063550e+002 + 3.0235850e+002 1.5133850e+002 2.7282300e+002 1.6042950e+002 + 2.3903100e+002 1.1067800e+002 7.9453000e+001 1.7107700e+002 + 3.4732250e+002 1.8414400e+002 3.5243400e+002 2.0054950e+002 + 2.1410550e+002 2.0812100e+002 1.9209200e+002 2.1014500e+002 + 2.9651650e+002 1.7797850e+002 2.6587100e+002 1.8715900e+002 + 3.0690550e+002 1.3915100e+002 2.7905500e+002 1.4664550e+002 + 3.0755450e+002 1.5028450e+002 2.8154800e+002 1.5912800e+002 + 7.6427500e+001 1.8039550e+002 8.2665500e+001 1.7071850e+002 + 2.3471950e+002 1.1491450e+002 2.1604250e+002 1.1743850e+002 + 2.4169600e+002 1.1238700e+002 6.7750500e+001 1.9952400e+002 + 3.0755450e+002 1.5028450e+002 2.3814250e+002 2.5626200e+002 + 3.1365900e+002 2.0281000e+002 2.9498900e+002 2.1596750e+002 + 2.1411400e+002 1.9151850e+002 1.9161650e+002 1.9293450e+002 + 2.8811050e+002 1.9081650e+002 2.5483200e+002 1.9954550e+002 + 1.2948250e+002 2.2789950e+002 1.0891000e+002 2.2010300e+002 + 8.3516000e+001 1.8413600e+002 1.9472300e+002 1.9885950e+002 + 7.8119000e+001 1.7241850e+002 8.7477000e+001 1.6300700e+002 + 2.9651650e+002 1.7797850e+002 1.8210000e+002 4.8681700e+001 + 1.2255200e+002 1.5963750e+002 1.1189250e+002 1.3000250e+002 + 3.5513400e+002 1.6721850e+002 2.3805150e+002 1.2053600e+002 + 3.2596550e+002 1.5994300e+002 3.1038750e+002 1.7185400e+002 + 1.0853300e+002 1.6344500e+002 1.1633200e+002 1.5656000e+002 + 3.6180050e+002 2.0686900e+002 7.2610500e+001 1.5987150e+002 + 2.0012100e+002 4.5417000e+001 2.0097650e+002 4.5859950e+001 + 1.8665100e+002 7.6156000e+001 1.8557650e+002 7.6297500e+001 + 3.0668800e+002 1.5394750e+002 2.7869700e+002 1.6339550e+002 + 3.3010150e+002 1.5469900e+002 3.1804750e+002 1.6654000e+002 + 3.2102350e+002 1.4967350e+002 1.5227100e+002 1.8614150e+002 + 2.8384950e+001 1.8400600e+002 3.9704350e+001 1.7010550e+002 + 2.9683100e+002 2.0801450e+002 2.2525600e+002 1.9041800e+002 + 6.2074500e+001 2.3851850e+002 4.5835800e+001 2.2259600e+002 + 1.5179600e+002 1.6906650e+002 1.8530350e+002 1.6236650e+002 + 3.3010150e+002 1.5469900e+002 7.0637500e+001 1.5868400e+002 + 1.9101900e+002 2.0624350e+002 1.7409600e+002 2.0558000e+002 + 1.4433350e+002 2.0504300e+002 8.5874500e+001 1.5348500e+002 + 3.2180250e+002 1.3722050e+002 3.0240700e+002 1.4650100e+002 + 3.6793900e+002 2.2950150e+002 3.5223750e+002 2.5125600e+002 + 3.5791800e+002 1.9988800e+002 3.5591800e+002 2.1845550e+002 + 1.9674400e+002 2.0645350e+002 1.7915100e+002 2.0655600e+002 + 3.1816550e+002 1.1693600e+002 2.9533550e+002 1.2460750e+002 + 3.2399950e+002 1.8537150e+002 3.1067200e+002 1.9826250e+002 + 1.1153800e+001 1.5971450e+002 1.7628550e+002 1.6593900e+002 + 3.3210650e+002 1.4851150e+002 3.1965600e+002 1.5978300e+002 + 2.7584500e+002 8.8974500e+001 2.7500850e+002 9.3037500e+001 + 4.1915300e+001 1.8043400e+002 4.3957650e+001 1.6738150e+002 + 3.2624500e+002 1.3943600e+002 3.1023500e+002 1.4940550e+002 + 3.1224400e+002 1.3321800e+002 2.8568650e+002 1.4196700e+002 + 2.9726900e+002 1.9775700e+002 2.6840300e+002 2.0876500e+002 + 5.6088000e+001 1.5065400e+002 7.6481000e+001 1.4130750e+002 + 1.9101900e+002 2.0624350e+002 1.0835400e+002 1.7635550e+002 + 3.0365650e+002 1.5001600e+002 2.7446550e+002 1.5841800e+002 + 1.7912700e+002 1.5299450e+002 2.3346650e+002 1.4753300e+002 + 2.9222950e+002 1.8776050e+002 5.8090000e+001 1.6222400e+002 + 3.2479850e+002 1.3931850e+002 3.0716100e+002 1.4911300e+002 + 1.3144600e+002 6.8035000e+001 3.5574450e+002 2.1173050e+002 + 3.1789700e+002 1.2829450e+002 2.1445850e+002 1.7648300e+002 + 8.0361000e+001 2.3195900e+002 6.4276000e+001 2.1868200e+002 + 3.6180050e+002 2.0686900e+002 3.5478100e+002 2.2578000e+002 + 9.6984500e+001 1.3567950e+002 1.0974850e+002 1.2934550e+002 + 3.4747600e+002 2.2390200e+002 3.3133850e+002 2.4183450e+002 + 1.1153800e+001 1.5971450e+002 3.5802700e+001 1.4647600e+002 + 2.7404450e+002 2.4653600e+002 1.6532900e+002 1.5169450e+002 + 3.0529400e+002 1.9526650e+002 2.8159550e+002 2.0687550e+002 + 5.7455500e+001 1.6895950e+002 2.1473250e+002 1.0983800e+002 + 1.2769600e+002 1.9505900e+002 1.2123950e+002 1.8901250e+002 + 2.6053200e+001 1.6551050e+002 1.9648950e+002 2.0448800e+002 + 2.8740050e+002 2.1190450e+002 2.5464750e+002 2.2193400e+002 + 2.5593700e+002 2.2407050e+002 2.2306950e+002 2.3054600e+002 + 2.3471950e+002 1.1491450e+002 2.1934950e+002 2.0089500e+002 + 3.1959300e+002 1.5739450e+002 3.0088450e+002 1.6808900e+002 + 1.9238350e+002 1.6535150e+002 1.0695850e+002 1.1564550e+002 + 2.4247550e+002 2.3166200e+002 2.1305000e+002 2.3636450e+002 + 3.0218800e+002 1.7132600e+002 2.7425350e+002 1.8068600e+002 + 1.7601850e+002 1.4352700e+002 1.6290100e+002 1.4236350e+002 + 3.0855650e+002 1.4262350e+002 2.8091400e+002 1.5121450e+002 + 3.1638550e+002 1.5340300e+002 2.9527450e+002 1.6362750e+002 + 2.5261200e+002 1.2029950e+002 2.3018150e+002 1.2391750e+002 + 1.7568950e+002 2.2714250e+002 1.5119350e+002 2.2430800e+002 + 1.5179600e+002 1.6906650e+002 1.4629400e+002 1.6549950e+002 + 1.9238350e+002 1.6535150e+002 1.7466150e+002 1.6517000e+002 + 3.1037750e+002 2.5751600e+002 5.8449500e+001 1.5756250e+002 + 2.6087750e+002 1.9306100e+002 1.2478950e+002 1.3764900e+002 + 7.3081000e+001 1.6900550e+002 8.4426500e+001 1.5967050e+002 + 2.8041600e+002 2.0380450e+002 2.4273700e+002 2.1295550e+002 + 3.4756300e+002 1.7979550e+002 3.5326600e+002 1.9526800e+002 + 2.5652100e+002 1.7681900e+002 2.2275200e+002 1.8204100e+002 + 3.1037750e+002 2.5751600e+002 2.7346600e+002 2.7244450e+002 + 2.6805100e+002 2.3139700e+002 2.3297150e+002 2.3981050e+002 diff --git a/assignment5/data/epipolar/house_points.txt b/assignment5/data/epipolar/house_points.txt new file mode 100644 index 0000000..67087c7 --- /dev/null +++ b/assignment5/data/epipolar/house_points.txt @@ -0,0 +1,10 @@ +1.9220093e+002 4.4911215e+001 1.9011120e+002 4.6260498e+001 +3.2319159e+002 6.4051402e+001 2.9641291e+002 6.8954121e+001 +1.3238785e+002 6.8836449e+001 1.4352955e+002 6.7162519e+001 +3.1302336e+002 1.1250000e+002 2.8566330e+002 1.2031337e+002 +1.0367757e+002 1.1010748e+002 1.1187792e+002 1.0478616e+002 +2.7713551e+002 1.7051869e+002 2.3848445e+002 1.7645023e+002 +8.7528037e+001 1.5317290e+002 1.0232271e+002 1.4479860e+002 +2.7593925e+002 2.4588318e+002 2.3967885e+002 2.5647512e+002 +3.2139720e+002 2.0999533e+002 3.1134292e+002 2.2542068e+002 +2.8132243e+002 1.9264953e+002 2.4385925e+002 1.9974106e+002 diff --git a/assignment5/instructions.pdf b/assignment5/instructions.pdf new file mode 100644 index 0000000..0d4c78d Binary files /dev/null and b/assignment5/instructions.pdf differ diff --git a/assignment5/solution.py b/assignment5/solution.py new file mode 100644 index 0000000..a661209 --- /dev/null +++ b/assignment5/solution.py @@ -0,0 +1,67 @@ +import numpy as np +import numpy.typing as npt +from matplotlib import pyplot as plt +import cv2 +import uz_framework.image as uz_image +import uz_framework.text as uz_text +import os + +############################################## +# EXCERCISE 1: Exercise 1: Image derivatives # +############################################## + +def ex1(): + one_b() + +def one_a() -> None: + """ + d = (f * px)/pz + f(T - px)/pz => d = (f * T) / pz + """ + +def one_b() -> None: + """ + Write a script that computes the disparity for a range of values of pz. Plot the values + to a figure and set the appropriate units to axes. Use the following parameters of + the system: focal length is f = 2.5mm and stereo system baseline is T = 12cm + """ + + pz_values = np.arange(0.001, 0.1, 0.001) + f = 0.0025 + T = 0.12 + + d = (f * T) / pz_values + + plt.plot(pz_values, d) + + plt.xlabel('pz [m]') + plt.ylabel('d [m]') + plt.title('Disparity for a range of values of pz') + plt.show() + +def one_c() -> None: + """ + In order to get a better grasp on the idea of distance and disparity, you will calculate + the numbers for a specific case. We will take the parameters from a specification of + a commercial stereo camera Bumblebee2 manufactured by the company PointGray: + f = 2.5mm, T = 12cm, whose image sensor has a resolution of 648×488 pixels that + are square and the width of a pixel is 7.4µm. We assume that there is no empty + space between pixels and that both cameras are completely parallel and equal. Lets + say that we use this system to observe a (point) object that is detected at pixel 550 + in x axis in the left camera and at the pixel 300 in the right camera. How far is the + object (in meters) in this case? How far is the object if the object is detected at + pixel 540 in the right camera? Solve this task analytically and bring your solution + to the presentation of the exercise. + """ + + +# ######## # +# SOLUTION # +# ######## # + +def main(): + ex1() + #ex2() + #ex3() + +if __name__ == '__main__': + main() diff --git a/assignment5/uz_framework/image.py b/assignment5/uz_framework/image.py new file mode 100644 index 0000000..72d60e2 --- /dev/null +++ b/assignment5/uz_framework/image.py @@ -0,0 +1,1462 @@ +import numpy as np +import cv2 as cv2 +from matplotlib import pyplot as plt +from PIL import Image +from typing import Union +import numpy.typing as npt +import enum +from tqdm import tqdm + +class ImageType(enum.Enum): + uint8 = 0 + float64 = 1 + +class DistanceMeasure(enum.Enum): + euclidian_distance = 0 + chi_square_distance = 1 + intersection_distance = 2 + hellinger_distance = 3 + +def imread(path: str, type: ImageType) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Reads an image in RGB order. Image type is transformed from uint8 to float, and + range of values is reduced from [0, 255] to [0, 1]. + """ + I = Image.open(path).convert('RGB') # PIL image. + I = np.asarray(I) # Converting to Numpy array. + if type == ImageType.float64: + I = I.astype(np.float64) / 255 + return I + elif type == ImageType.uint8: + return I + + raise Exception(f"Unrecognized image format! {type}") + +def imread_gray(path: str, type: ImageType) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Reads an image in gray. Image type is transformed from uint8 to float, and + range of values is reduced from [0, 255] to [0, 1]. + """ + + I = Image.open(path).convert('L') # PIL image opening and converting to gray. + I = np.asarray(I) # Converting to Numpy array. + + if type == ImageType.float64: + I = I.astype(np.float64) / 255 + return I + elif type == ImageType.uint8: + return I + + raise Exception("Unrecognized image format!") + +def signal_show(*signals): + """ + Plots all given 1D signals in the same plot. + Signals can be Python lists or 1D numpy array. + """ + for s in signals: + if type(s) == np.ndarray: + s = s.squeeze() + plt.plot(s) + plt.show() + + +def convolve(I: np.ndarray, *ks): + """ + Convolves input image I with all given kernels. + + :param I: Image, should be of type float64 and scaled from 0 to 1. + :param ks: 2D Kernels + :return: Image convolved with all kernels. + """ + for k in ks: + k = np.flip(k) # filter2D performs correlation, so flipping is necessary + I = cv2.filter2D(I, cv2.CV_64F, k) + return I + +def transform_coloured_image_to_grayscale(image: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """ + Accepts float64 picture with three colour channels and returns float64 grayscale image + with one channel. + """ + grayscale_image = np.zeros(image.shape[:2]) + + for i in range(image.shape[0]): + for j in range(image.shape[1]): + grayscale_image[i, j] = (image[i, j, 0] + image[i,j, 1] + image[i, j, 2]) / 3 + + + return grayscale_image + +def invert_coloured_image_part(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + startx: int, endx: int, starty: int, endy: int) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts image, starting position end end position for axes x & y. Returns whole image with inverted part. + """ + inverted_image = image.copy() + + if image.dtype.type == np.float64: + for i in range(startx, endx): + for j in range(starty, endy): + inverted_image[i, j, 0] = 1 - image[i, j, 0] + inverted_image[i, j, 1] = 1 - image[i, j, 1] + inverted_image[i, j, 2] = 1 - image[i, j, 2] + return inverted_image + elif image.dtype.type == np.uint8: + for i in range(startx, endx): + for j in range(starty, endy): + inverted_image[i, j, 0] = 255 - image[i, j, 0] + inverted_image[i, j, 1] = 255 - image[i, j, 1] + inverted_image[i, j, 2] = 255 - image[i, j, 2] + return inverted_image + + raise Exception("Unrecognized image format!") + +def invert_coloured_image(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts image and inverts it + """ + return invert_coloured_image_part(image, 0, image.shape[0], 0, image.shape[1]) + +def calculate_best_treshold_using_otsu_method(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> int: + """ + Accepts image and returns best treshold using otsu method + """ + + if image.dtype.type == np.float64: + im = image.copy() + im = im * (255.0/im.max()) + elif image.dtype.type == np.uint8: + im = image.copy() + else: + raise Exception("Unrecognized image format!") + + treshold_range = np.arange(np.max(im) + 1) + criterias = [] + + for treshold in treshold_range: + # create the thresholded image + thresholded_im = np.zeros(im.shape) + + thresholded_im[im >= treshold] = 1 + + # compute weights + nb_pixels = im.size + nb_pixels1 = np.count_nonzero(thresholded_im) + weight1 = nb_pixels1 / nb_pixels + weight0 = 1 - weight1 + + # if one the classes is empty, eg all pixels are below or above the threshold, that threshold will not be considered + # in the search for the best threshold + if weight1 == 0 or weight0 == 0: + continue + + # find all pixels belonging to each class + val_pixels1 = im[thresholded_im == 1] + val_pixels0 = im[thresholded_im == 0] + + # compute variance of these classes + var0 = np.var(val_pixels0) if len(val_pixels0) > 0 else 0 + var1 = np.var(val_pixels1) if len(val_pixels1) > 0 else 0 + + criterias.append( weight0 * var0 + weight1 * var1) + + best_threshold = treshold_range[np.argmin(criterias)] + return best_threshold + + +def get_image_bins_for_loop(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + number_of_bins: int) -> npt.NDArray[np.float64]: + """ + Accepts image in the float64 format or uint8, returns normailzed + image bins, histogram + """ + if image.dtype.type == np.float64 or image.dtype.type == np.uint8: + bin_restrictions = np.linspace(np.min(image), np.max(image), num=number_of_bins) + else: + raise Exception("Unrecognized image format!") + + bins = np.zeros(number_of_bins).astype(np.float64) + + for pixel in image.reshape(-1): + # https://stackoverflow.com/a/16244044 + bins[np.argmax(bin_restrictions > pixel)] += 1 + + return bins / np.sum(bins) + + +# Much faster implementation than for loop +def get_image_bins(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + number_of_bins: int) -> npt.NDArray[np.float64]: + """ + Accepts image in the float64 format or uint8, returns normailzed + image bins, histogram + """ + if image.dtype.type == np.float64 or image.dtype.type == np.uint8: + bins = np.linspace(np.min(image), np.max(image), num=number_of_bins) + else: + raise Exception("Unrecognized image format!") + + # Put pixels into classes + # ex. binsize = 10 then 0.4 would map into 4 + binarray = np.digitize(image.reshape(-1), bins).astype(np.uint8) + + # Now count those values + binarray = np.unique(binarray, return_counts=True) + + counts = binarray[1].astype(np.float64) # Get the counts out of tuple + + # Check if there is any empty bin + empty_bins = [] + bins = binarray[0] + for i in range(1, number_of_bins + 1): + if i not in bins: + empty_bins.append(i) + + # Add empty bins with zeros + if empty_bins != []: + for i in empty_bins: + counts = np.insert(counts, i - 1, 0) + + return counts + +def get_image_bins_ND(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + number_of_bins: int) -> npt.NDArray[np.float64]: + """ + Accepts image in the float64 format or uint8 and number of bins + Returns normailzed image histogram bins + """ + bs = [] + hist = np.zeros((number_of_bins, number_of_bins, number_of_bins)) + bins = np.linspace(np.min(image), np.max(image), num=number_of_bins) + + for i in range(image.shape[2]): + v = image[:, :, i].reshape(-1) + bs.append(np.digitize(v, bins).astype(np.uint32)) + + for i in range(len(bs[0])): + hist[bs[2][i] -1, bs[1][i] -1, bs[0][i] - 1] += 1 + + return hist / np.sum(hist) + +def get_image_bins_gradient_magnitude_and_angles(image: Union[npt.NDArray[np.float64], + npt.NDArray[np.uint8]]) -> npt.NDArray[np.float64]: + """ + Accepts: image, + Returns: 1D histogram of image using gradient magnitude and gradient angles + Works OK on many dimensions + """ + WIDTH = image.shape[0] + HEIGHT = image.shape[1] + WIDTH_8 = WIDTH // 8 + HEIGHT_8 = HEIGHT // 8 + + def split_into_blocks(image): + blocks = [] + for y in range(0, 7): + for x in range(0, 8): + if x == 7: + blocks.append(image[y*WIDTH_8 : (y +1)* WIDTH_8, 7*HEIGHT_8 : HEIGHT].copy()) # handle the last column + else: + blocks.append(image[y*WIDTH_8 : (y +1)* WIDTH_8, x*HEIGHT_8 : (x +1)* HEIGHT_8].copy()) + + for x in range(0, 7): + blocks.append(image[7*WIDTH_8 : WIDTH, x*HEIGHT_8 : (x +1)* HEIGHT_8].copy()) # handle the last row + blocks.append(image[7*WIDTH_8 : WIDTH, 7*HEIGHT_8 : HEIGHT].copy()) + return blocks + + angle_linspace = np.linspace(-np.pi, np.pi, num=8) + gm, da = gradient_magnitude(image, 1 ) + gm_blocks = split_into_blocks(gm) + da_blocks = split_into_blocks(da) + + histogram = [] + for ix in range(64): + # Gradient magnitude and angles + gm_l = gm_blocks[ix].reshape(-1) + da_l = da_blocks[ix].reshape(-1) + + accumulator = np.zeros(8) + binned = np.digitize(da_l, angle_linspace) + for i in range(binned.size): + accumulator[binned[i] - 1] += gm_l[i] + + # Normalize accumulator + histogram.extend(accumulator / np.sum(accumulator)) + + histogram = np.array(histogram) + return histogram / np.sum(histogram) + +def compare_two_histograms(h1: npt.NDArray[np.float64], h2: npt.NDArray[np.float64], + method: DistanceMeasure) -> float: + """ + Accepts two histograms and method of comparison + Returns distance between them + """ + + if method == DistanceMeasure.euclidian_distance: + d = np.sqrt(np.sum(np.square(h1 - h2))) + elif method == DistanceMeasure.chi_square_distance: + d = 0.5 * np.sum(np.square(h1 - h2) / (h1 + h2 + np.finfo(float).eps)) + elif method == DistanceMeasure.intersection_distance: + d = 1 - np.sum(np.minimum(h1, h2)) + elif method == DistanceMeasure.hellinger_distance: + d = np.sqrt(0.5 * np.sum(np.square(np.sqrt(h1) - np.sqrt(h2)))) + else: + raise Exception('Unsuported method chosen!') + + return d.astype(float) + + +def apply_mask_on_image(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + mask: npt.NDArray[np.uint8]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts image and applys mask to image + """ + image = image.copy() + + mask = np.expand_dims(mask, axis=2) + image = mask * image + + return image + +def simple_convolution(signal: npt.NDArray[np.float64], kernel: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """ + Accepts: signal & kernel + + Returns: convolved signal with a kernel + """ + N = int(np.ceil(len(kernel) / 2 - 1)) + n_conv = signal.size - kernel.size + 1 + print(N, signal.size -N, n_conv) + + convolved_signal = np.zeros(len(signal)) + rev_kernel = kernel[::-1].copy() + + for i in range(n_conv): + convolved_signal[i] = np.dot(signal[i: i+kernel.size], rev_kernel) # Well if you would add i+N then you wuold shift this + + return convolved_signal + +def simple_convolution_improved(signal: npt.NDArray[np.float64], kernel: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """ + Accepts: signal & kernel + + Returns: convolved signal with a kernel + Improved method replicates edges of an signal + """ + signal_len = signal.size + kernel_len = kernel.size + signal = signal.copy() + + # Calculate which values to fill in and range + EDGE_FRONT = signal[0] + EDGE_BACK = signal[-1] + EXTEND_RANGE = int(np.floor(kernel.size / 2)) + + # Append end insert edges + front = [ EDGE_FRONT for _ in range(EXTEND_RANGE)] + back = [ EDGE_BACK for _ in range(EXTEND_RANGE)] + signal = np.insert(signal, 1, front, axis=0) + signal = np.append(signal, back, axis=0) + + convolved_signal = np.zeros(signal_len) + rev_kernel = kernel[::-1].copy() + n_conv = signal_len - kernel_len + 1 + + for i in range(n_conv): + convolved_signal[i] = np.dot(signal[i: i+kernel.size], rev_kernel) + + return convolved_signal + +def get_gaussian_kernel(sigma: float) -> npt.NDArray[np.float64]: + """ + Accepts sigma + Returns gaussian kernel + """ + # https://github.com/mikepound/convolve/blob/master/run.gaussian.py + kernel_size = int(2 * np.ceil(3*sigma) + 1) + k_min_max = np.ceil(3*sigma) + k_interval = np.arange(-k_min_max, k_min_max + 1.) + + result = (1. / (np.sqrt(2. * np.pi )* sigma)) * np.exp(- (np.square(k_interval)) / (2.*np.square(sigma))) + + assert(kernel_size == len(result)) + + return result / np.sum(result) + +def sharpen_image(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sharpen_factor=1.0) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image & sharpen factor + + Returns: sharpened image + https://blog.demofox.org/2022/02/26/image-sharpening-convolution-kernels/ <-- sharpening kernel, but also on slides + good explanation + """ + sharpened_image = image.copy() + + KERNEL = np.array([[-1, -1, -1], + [-1, 17, -1], + [-1, -1,-1]]) * 1./9. * sharpen_factor + + if image.dtype.type == np.float64: + sharpened_image = cv2.filter2D(sharpened_image, cv2.CV_64F, KERNEL) + elif image.dtype.type == np.uint8: + sharpened_image = cv2.filter2D(sharpened_image, cv2.CV_8U, KERNEL) + + return sharpened_image + +def gaussfilter2D(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image, sigma + Applies gaussian noise on image + returns: filtered_image + """ + filtered_image = image.copy() + kernel = np.array([get_gaussian_kernel(sigma)]) + filtered_image = cv2.filter2D(filtered_image, cv2.CV_64F, kernel) + filtered_image = cv2.filter2D(filtered_image, cv2.CV_64F, kernel.T) + + return filtered_image + +def gaussdx(sigma: float) -> npt.NDArray[np.float64]: + """ + Accepts sigma + Returns gaussian kernel + """ + kernel_size = int(2 * np.ceil(3*sigma) + 1) + k_min_max = np.ceil(3*sigma) + k_interval = np.arange(-k_min_max, k_min_max + 1.) + + result = - (1. / (np.sqrt(2. * np.pi )* np.power(sigma, 3))) * k_interval* np.exp(- (np.square(k_interval)) / (2.*np.square(sigma))) + + assert(kernel_size == len(result)) + + return result / np.sum(np.abs(result)) + + +def simple_median(signal: npt.NDArray[np.float64], width: int) -> npt.NDArray[np.float64]: + """ + Accepts: signal & width + returns signal improved using median filter + """ + if width % 2 == 0: + raise Exception('No u won\'t do that') + + signal = signal.copy() + for i in range(len(signal) - int(np.ceil(width/2))): + middle_element = int(i + np.floor(width/2)) + signal[middle_element] = np.median(signal[i:i+width]) + return signal + +def apply_median_method_2D(image:Union[npt.NDArray[np.float64], + npt.NDArray[np.uint8]], width: int) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image & filter width + returns: image with median filter applied + """ + if width % 2 == 0: + raise Exception('No u won\'t do that') + + image = image.copy() + W_HALF = int(np.floor(width/2)) + padded_image = np.pad(image, W_HALF, mode='edge') + IMAGE_HEIGHT = image.shape[0] # y + IMAGE_WIDTH = image.shape[1] # x + + for x in range(W_HALF, IMAGE_WIDTH): # I think we can start from 0, cuz we padded an image + for y in range(W_HALF, IMAGE_HEIGHT): + median_filter = np.zeros(0) + STARTX = x - W_HALF + STARTY = y - W_HALF + for m in range(width): + median_filter = np.append(median_filter, padded_image[STARTY + m][STARTX: STARTX + width], axis=0) + if image.dtype.type == np.uint8: + image[y][x] = int(np.mean(median_filter)) + else: + image[y][x] = np.mean(median_filter) + return image + +def filter_laplace(image:Union[npt.NDArray[np.float64], + npt.NDArray[np.uint8]], sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image & sigma + returns: image with laplace filter applied + """ + # Prepare unit impulse and gauss kernel + unit_impulse = np.zeros((1, 2 * int(np.ceil(3*sigma)) + 1)) + unit_impulse[0][int(np.ceil(unit_impulse.size /2)) - 1]= 1 + gauss_kernel = np.array([get_gaussian_kernel(sigma)]) + assert(len(gauss_kernel[0]) == len(unit_impulse[0])) + + laplacian_filter = unit_impulse - gauss_kernel[0] + + # Now apply laplacian filter + applied_by_x = cv2.filter2D(image, -1, laplacian_filter) + applied_by_y = cv2.filter2D(applied_by_x, -1, laplacian_filter.T) + + return applied_by_y + +def gauss_noise(I, magnitude=.1) -> npt.NDArray[np.float64]: + """ + Accepts: image & magnitude + Returns: image with gaussian noise applied + """ + # input: image, magnitude of noise + # output: modified image + I = I.copy() + + return I + np.random.normal(size=I.shape) * magnitude + + +def sp_noise(I, percent=.1) -> npt.NDArray[np.float64]: + """ + Accepts: image & percent + Returns: image with salt and pepper noise applied + """ + # input: image, percent of corrupted pixels + # output: modified image + + res = I.copy() + res[np.random.rand(I.shape[0], I.shape[1]) < percent / 2] = 1 + res[np.random.rand(I.shape[0], I.shape[1]) < percent / 2] = 0 + + return res + +def sp_noise1D(signal, percent=.1) -> npt.NDArray[np.float64]: + """ + Accepts: signal & percent + Returns: signal with salt and pepper noise applied + """ + signal = signal.copy() + signal[np.random.rand(signal.shape[0]) < percent / 2] = 2 + signal[np.random.rand(signal.shape[0]) < percent / 2] = 1 + signal[np.random.rand(signal.shape[0]) < percent / 2] = 4 + signal[np.random.rand(signal.shape[0]) < percent / 2] = 0.4 + return signal + +def sum_two_grayscale_images(image_a: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + image_b :Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image_a, image_b + Returns: image_a + image_b + """ + # Merge image_a and image_b + return (image_a + image_b)/ 2 + +def generate_dirac_impulse(size: int) -> npt.NDArray[np.float64]: + """ + Accepts: size + Returns: dirac impulse of size + """ + + dirac_impulse = np.zeros((size, size)) + dirac_impulse[int(size/2), int(size/2)] = 1 + + return dirac_impulse + +def derive_image_by_x(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image + Returns: image derived by x + """ + image = image.copy() + gaussd = np.array([gaussdx(sigma)]) + gauss = np.array([get_gaussian_kernel(sigma)]) + gaussd = np.flip(gaussd, axis=1) + + applied_by_y = cv2.filter2D(image, cv2.CV_64F, gauss.T) + applied_by_x = cv2.filter2D(applied_by_y, cv2.CV_64F, gaussd) + + return applied_by_x + +def derive_image_by_y(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image + Returns: image derived by y + """ + image = image.copy() + gaussd = np.array([gaussdx(sigma)]) + gauss = np.array([get_gaussian_kernel(sigma)]) + gaussd = np.flip(gaussd, axis=1) + + applied_by_x = cv2.filter2D(image, cv2.CV_64F, gauss) + applied_by_y = cv2.filter2D(applied_by_x, cv2.CV_64F, gaussd.T) + + return applied_by_y + +def derive_image_first_order(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float) -> tuple[Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]]: + """ + Accepts: image + returns: image derived by x, image derived by y + """ + return derive_image_by_x(image, sigma), derive_image_by_y(image, sigma) + +def derive_image_second_order(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float) -> tuple[tuple[Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]], tuple[Union[npt.NDArray[np.float64], + npt.NDArray[np.uint8]], Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]]]: + """ + Accepts: image + Returns: Ixx, Ixy, Iyx, Iyy + """ + derived_by_x = derive_image_by_x(image, sigma) + derived_by_y = derive_image_by_y(image, sigma) + + return derive_image_first_order(derived_by_x, sigma), derive_image_first_order(derived_by_y, sigma) + +def gradient_magnitude(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float) -> tuple[Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]]: + """ + Accepts: image + Returns: gradient magnitude of image and derivative angles + """ + Ix, Iy = derive_image_first_order(image, sigma) + return np.sqrt(Ix**2 + Iy**2), np.arctan2(Iy, Ix) + + +def find_edges_primitive(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float, theta: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Aceppts: image, sigma & theta + Returns: image with edges + """ + derivative_magnitude, _ = gradient_magnitude(image, sigma) + + binary_mask = np.zeros_like(derivative_magnitude) + binary_mask[(derivative_magnitude >= theta)] = 1 + + return binary_mask + + +def find_edges_nms(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float, theta: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Aceppts: image, sigma & theta + Returns: image with edges + """ + step_size = np.pi/8 + + + def get_gradient_orientation(angle: float) -> tuple[tuple[int, int], tuple[int, int]]: + """ + Accepts: angle + Returns: indexes of gradient orientation (x, y), (x, y) + Basically walks around the unit circle and returns the indexes of the closest angle + """ + angle = angle % np.pi + + for i in range(0, 8): + if angle >= i * step_size and angle <= (i+1) * step_size: + if i == 0 or i == 7: + return (0, 1), (0, -1) + elif i == 1 or i == 2: + return (-1, -1), (1, 1) + elif i == 3 or i == 4: + return (-1, 0), (1, 0) + elif i == 5 or i == 6: + return (-1, 1), (1, -1) + raise ValueError(f"Angle {angle} is not in range") + + + derivative_magnitude, derivative_angles = gradient_magnitude(image, sigma) + reduced_magnitude = derivative_magnitude.copy() + # Set low elements to 0 + reduced_magnitude[(derivative_magnitude <= theta)] = 0 + + + for y in range(1, reduced_magnitude.shape[0]-1): + for x in range(1, reduced_magnitude.shape[1]-1): + gp1, gp2 = get_gradient_orientation(derivative_angles[y, x]) + + if reduced_magnitude[y, x] < reduced_magnitude[y+gp1[0], x+gp1[1]] or reduced_magnitude[y, x] < reduced_magnitude[y+gp2[0], x+gp2[1]]: + reduced_magnitude[y, x] = 0 + + return reduced_magnitude + + +def find_edges_canny(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + sigma: float, theta: float, t_low: float, t_high: float) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Aceppts: image, sigma, theta, t_low, t_high + Returns: image with edges + """ + + image_nms = find_edges_nms(image, sigma, theta) + image_nms[image_nms < t_low] = 0 + image_nms[image_nms >= t_low] = 1 + + image_nms = image_nms.astype(np.uint8) + new_image = np.zeros_like(image_nms) + + num_labels, labels, _, _ = cv2.connectedComponentsWithStats(image_nms, connectivity=4, ltype=cv2.CV_32S) + + for i in range (1, num_labels): + idxs = np.where(image_nms[labels == i] > t_high) + + if idxs != []: + new_image[labels == i] = 1 + + return new_image + + +def hough_transform_a_point(x: int, y: int, n_bins: int) -> npt.NDArray[np.float64]: + """ + Accepts: x, y, n_bins + Returns: x, y transformed into hough space + """ + theta_values = np.linspace(-np.pi/2, np.pi, n_bins) + + cos_theta = np.cos(theta_values) + sin_theta = np.sin(theta_values) + accumlator = np.zeros((n_bins, n_bins)) + + for i in range(n_bins): + r = np.round(x * cos_theta[i] + y * sin_theta[i]) + n_bins/2 + accumlator[int(r), i] += 1 + + return accumlator + +def hough_transform_a_circle(image: Union[npt.NDArray[np.float64] , npt.NDArray[np.uint8]], + edged_image: Union[npt.NDArray[np.float64] , npt.NDArray[np.uint8]], + r_start: int, r_end: int, treshold: float) -> npt.NDArray[np.float64]: + """ + Accepts: image, r_start, r_end + Returns: hough space + """ + + edged_image = edged_image.copy() + edged_image[edged_image < treshold] = 0 + + accumlator = np.zeros((edged_image.shape[0], edged_image.shape[1], r_end - r_start)) + indices = np.argwhere(edged_image) + + gm, ga = gradient_magnitude(image, 1) + + # Loop through all nonzero pixels above treshold + for i in tqdm(range(len(indices)), desc='Hough transform'): + for r in range(0, r_end - r_start): + y, x = indices[i] + a = x - r * np.cos(ga[y, x]) + b = y - r * np.sin(ga[y, x]) + accumlator[int(a), int(b), r ] += 1 + return accumlator + + +def hough_find_lines(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], + n_bins_theta: int, n_bins_rho: int, treshold: float) -> npt.NDArray[np.uint64]: + """" + Accepts: bw image with lines, n_bins_theta, n_bins_rho, treshold + Returns: image points above treshold transformed into hough space + """ + + image = image.copy() + image[image < treshold] = 0 + theta_values = np.linspace(-np.pi/2, np.pi/2, n_bins_theta) + + D = np.sqrt(image.shape[0]**2 + image.shape[1]**2) + rho_values = np.linspace(-D, D, n_bins_rho) + accumulator = np.zeros((n_bins_rho, n_bins_theta), dtype=np.uint64) + + cos_precalculated = np.cos(theta_values) + sin_precalculated = np.sin(theta_values) + + y_s, x_s = np.nonzero(image) + + # Loop through all nonzero pixels above treshold + for i in tqdm(range(len(y_s)), desc='Hough transform'): + x = x_s[i] + y = y_s[i] + + # Precalculate rhos + rhos = np.round(x* cos_precalculated + y* sin_precalculated).astype(np.int64) + + # Bin the rhos + binned = np.digitize(rhos, rho_values) -1 + + # Add to accumulator + for theta in range(n_bins_theta): + accumulator[binned[theta], theta] += 1 + + return accumulator + +def hough_find_lines_i(image_with_lines: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]], gradient_angles: npt.NDArray[np.float64], + gradient_magnitude: npt.NDArray[np.float64], + n_bins_theta: int, n_bins_rho: int, treshold: float) -> Union[npt.NDArray[np.uint64], npt.NDArray[np.float64]]: + """" + Accepts: bw image with lines, n_bins_theta, n_bins_rho, treshold + Returns: image points above treshold transformed into hough space + """ + + image = image_with_lines.copy() + image[image < treshold] = 0 + theta_values = np.linspace(-np.pi/2, np.pi/2, n_bins_theta) + D = np.sqrt(image.shape[0]**2 + image.shape[1]**2) + rho_values = np.linspace(-D, D, n_bins_rho) + accumulator = np.zeros((n_bins_rho, n_bins_theta), dtype=np.float64) + + cos_precalculated = np.cos(theta_values) + sin_precalculated = np.sin(theta_values) + + indices = np.argwhere(image) + + # Loop through all nonzero pixels above treshold + for i in tqdm(range(len(indices)), desc='Hough transform'): + y, x = indices[i] + angle = (np.mod(gradient_angles[y, x] + np.pi/2 , np.pi)) - np.pi/2 + + theta = np.digitize(angle, theta_values) -1 + rho = np.round(x* cos_precalculated[theta] + y* sin_precalculated[theta]).astype(np.float64) + binned_rho = np.digitize(rho, rho_values) - 1 # cuz digitize is returning bin number + 1 + + # Add to accumulator + accumulator[binned_rho, theta] += gradient_magnitude[y, x] + + return accumulator + +def nonmaxima_suppression_box(image: Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]) -> Union[npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + """ + Accepts: image with sinusoids in hough space + Returns: image with sinusoids + """ + nms_image = np.zeros_like(image) + + for y in range(1, image.shape[0]-1): + for x in range(1, image.shape[1]-1): + neighbours = image[y - 1 : y + 2, x - 1 : x + 2] + if image[y, x] >= np.max(neighbours): + nms_image[y, x] = image[y, x] + + return nms_image + +def retrieve_hough_pairs(original_image: npt.NDArray[np.float64], hough_image: npt.NDArray[np.uint64], + treshold: int, n_bins_theta: int, n_bins_rho: int) -> list[tuple[int, int]]: + """ + Accepts: original image, image with sinusoids in hough space, treshold, n_bins theta in h space, n_bins rho in h space + Returns: list of pairs of sinusoids + """ + theta_values = np.linspace(-np.pi/2, np.pi/2, n_bins_theta) + D = np.sqrt(original_image.shape[0]**2 + original_image.shape[1]**2) + rho_values = np.linspace(-D, D, n_bins_rho) + + hough_image = hough_image.copy() + hough_image[hough_image < treshold] = 0 + + y_s, x_s = np.nonzero(hough_image) + + pairs = [] + for i in range(len(x_s)): + x = x_s[i] # theta values + y = y_s[i] # rho values + + theta = theta_values[x] + rho = rho_values[y] + + pairs.append((rho, theta)) + return pairs + +def select_best_pairs(image_line_params, n =10): + """ + Accepts: list of pairs of sinusoids + Returns: reduced and prioritized list of pairs of sinusoids + """ + image_line_params = np.array(image_line_params) + # Sorts just kth element so every eleement before kth element is lower than kth element + # and every element after kth element is higher than kth element + partition = np.argpartition(image_line_params, kth=len(image_line_params) - n - 1, axis=0)[-n:] + image_line_params = image_line_params[partition.T[0]] + return image_line_params + +def get_line_to_plot(rho, theta, h, w): + """ + Accepts: rho, theta, image height h, image width w + Returns: line to plot + usage example: plt.plot(uz_image.get_line_to_plot(rho, theta, h, w), 'r', linewidth=.7) + """ + + c = np.cos(theta) + s = np.sin(theta) + + xs = [] + ys = [] + if s != 0: + y = int(rho / s) + if 0 <= y < h: + xs.append(0) + ys.append(y) + + y = int((rho - w * c) / s) + if 0 <= y < h: + xs.append(w - 1) + ys.append(y) + if c != 0: + x = int(rho / c) + if 0 <= x < w: + xs.append(x) + ys.append(0) + + x = int((rho - h * s) / c) + if 0 <= x < w: + xs.append(x) + ys.append(h - 1) + + return xs[:2], ys[:2] + +def hessian_points(image: Union[npt.NDArray[np.float64], + npt.NDArray[np.uint8]], sigma: float, + treshold: float) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + """ + Accepts: image, sigma, treshold + Returns: image with hessian points + """ + image = image.copy() + (ixx, ixy), (iyx, iyy) = derive_image_second_order(image, sigma) + + # Calculate determinant + determinant = ixx * iyy - ixy * iyx + + # Apply nonmaxima suppression + points = nonmaxima_suppression_box(determinant) + + points = np.argwhere(points > treshold) + + return determinant.astype(np.float64), points.astype(np.float64) + +def harris_detector(image: Union[npt.NDArray[np.float64], + npt.NDArray[np.uint8]], sigma: float, treshold: float, + alpha = 0.06) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + """ + Accepts: image, sigma, treshold + """ + image = image.copy() + + (ix, iy) = derive_image_first_order(image, sigma) + + ix2 = np.square(ix) + iy2 = np.square(iy) + ixy = np.multiply(ix, iy) + + # Apply subsequent smoothing with gaussian kernel + gauss = np.array([get_gaussian_kernel(sigma * 1.6)]) # 1.6 is empirically chosen + ix2 = cv2.filter2D(ix2, cv2.CV_64F, gauss) + iy2 = cv2.filter2D(iy2, cv2.CV_64F, gauss) + ixy = cv2.filter2D(ixy, cv2.CV_64F, gauss) + ix2 = cv2.filter2D(ix2, cv2.CV_64F, gauss.T) + iy2 = cv2.filter2D(iy2, cv2.CV_64F, gauss.T) + ixy = cv2.filter2D(ixy, cv2.CV_64F, gauss.T) + + + determinant = ix2 * iy2 - np.square(ixy) + trace = ix2 + iy2 + + # Calculate harris response + features = determinant - alpha * np.square(trace) + + nms_features = nonmaxima_suppression_box(features) + points = np.argwhere(nms_features > treshold) + + return features.astype(np.float64), points.astype(np.float64) + +def simple_descriptors(I, Y, X, n_bins = 16, radius = 40, sigma = 2)\ + -> npt.NDArray[np.float64]: + """ + Computes descriptors for locations given in X and Y. + + I: Image in grayscale. + Y: list of Y coordinates of locations. (Y: index of row from top to bottom) + X: list of X coordinates of locations. (X: index of column from left to right) + + Returns: tensor of shape (len(X), n_bins^2), so for each point a feature of length n_bins^2. + """ + + assert np.max(I) <= 1, "Image needs to be in range [0, 1]" + assert I.dtype == np.float64, "Image needs to be in np.float64" + # Additional assertions for dumb programmers as me + assert len(X) == len(Y), "X and Y need to have same length" + assert len(X) > 0, "X and Y need to have at least one element" + + g = get_gaussian_kernel(sigma) + d = gaussdx(sigma) + + Ix = convolve(I, g.T, d) + Iy = convolve(I, g, d.T) + Ixx = convolve(Ix, g.T, d) + Iyy = convolve(Iy, g, d.T) + + mag = np.sqrt(Ix ** 2 + Iy ** 2) + mag = np.floor(mag * ((n_bins - 1) / np.max(mag))) + + feat = Ixx + Iyy + feat += abs(np.min(feat)) + feat = np.floor(feat * ((n_bins - 1) / np.max(feat))) + + desc = [] + + for y, x in zip(Y, X): + miny = max(y - radius, 0) + maxy = min(y + radius, I.shape[0]) + minx = max(x - radius, 0) + maxx = min(x + radius, I.shape[1]) + miny, maxy, minx, maxx = int(miny), int(maxy), int(minx), int(maxx) # Convert to int for indexing + r1 = mag[miny:maxy, minx:maxx].reshape(-1) + r2 = feat[miny:maxy, minx:maxx].reshape(-1) + + a = np.zeros((n_bins, n_bins)) + for m, l in zip(r1, r2): + a[int(m), int(l)] += 1 + + a = a.reshape(-1) + a /= np.sum(a) + + desc.append(a) + + return np.array(desc) + +def find_correspondences(img_a_descriptors: npt.NDArray[np.float64], + img_b_descriptors: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + correspondances = [] + """ + Accepts: img_a_descriptors, img_b_descriptors + Returns: correspondances indices + """ + + # Find img_a correspondences + for idx, descriptor_a in enumerate(img_a_descriptors): + dists = np.sqrt(0.5 * np.sum(np.square(np.sqrt(descriptor_a) - np.sqrt(img_b_descriptors)), axis=1)) + min_dist_idx = np.argmin(dists) + correspondances.append((idx, min_dist_idx)) + + return np.array(correspondances) + +def display_matches(I1, pts1, I2, pts2) \ + -> None: + """ + Displays matches between images. + + I1, I2: Image in grayscale. + pts1, pts2: Nx2 arrays of coordinates of feature points for each image (first column is x, second is y coordinates) + """ + + assert I1.shape[0] == I2.shape[0] and I1.shape[1] == I2.shape[1], "Images need to be of the same size." + + I = np.hstack((I1, I2)) + w = I1.shape[1] + plt.imshow(I, cmap='gray') + + for p1, p2 in zip(pts1, pts2): + x1 = p1[0] + y1 = p1[1] + x2 = p2[0] + y2 = p2[1] + plt.plot(x1, y1, 'bo', markersize=3) + plt.plot(x2 + w, y2, 'bo', markersize=3) + plt.plot([x1, x2 + w], [y1, y2], 'r', linewidth=.8) + + plt.show() + +def find_matches(image_a: npt.NDArray[np.float64], + image_b: npt.NDArray[np.float64], + sigma=3, treshold=1e-6) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + """ + Finds matches between two images. + + image_a, image_b: Image in grayscale. + + Returns: tuple of two arrays of shape (N, 2) where N is the number of matches. + """ + + # Get the keypoints + _, image_a_keypoints = harris_detector(image_a, sigma=sigma, treshold=treshold) + _, image_b_keypoints = harris_detector(image_b, sigma=sigma, treshold=treshold) + + print("[+] Keypoints detected") + + # Get the descriptors + image_a_descriptors = simple_descriptors(image_a, image_a_keypoints[:, 0], image_a_keypoints[:, 1]) + image_b_descriptors = simple_descriptors(image_b, image_b_keypoints[:, 0], image_b_keypoints[:, 1]) + + print("[+] Descriptors computed") + + # Find correspondences + correspondences_a = find_correspondences(image_a_descriptors, image_b_descriptors) + correspondences_b = find_correspondences(image_b_descriptors, image_a_descriptors) + + print("[+] Correspondences found") + + def select_best_correspondences(c_a, d_a, c_b, d_b): + #Find correspondances that map into same index a->b (b) + unique, counts = np.unique(c_a[:, 1], return_counts=True) + # Find all b's + duplicates = unique[counts > 1] + for duplicate in duplicates: + # Find the indices of the duplicates find all a's + indices = np.where(c_a[:, 1] == duplicate)[0] + # Extract those from the descriptors + candidates = d_a[indices] + # Extract comparing distance from b's descriptors + comparing_distance = d_b[duplicate] + # Find the closest one using hellingers distance + dists = np.sqrt(0.5 * np.sum(np.square(np.sqrt(comparing_distance) - np.sqrt(candidates)), axis=1)) + # Select the best one + min_dist_idx = np.argmin(dists) + # and map that candidate back to the indces index + candidate = indices[min_dist_idx] + candidate = c_a[candidate] + #if np.flip(candidate) in c_b: + # # Remove it from the indices so it does not get removed in the next step + # indices = np.delete(indices, min_dist_idx) + #else: + # print(f'[i] select_best_correspondences -> Not reciprocal {np.flip(candidate)}') + #Remove remainig from the correspondences + c_a = np.delete(c_a, indices, axis=0) + + return c_a + + correspondences_a = select_best_correspondences(correspondences_a, image_a_descriptors, correspondences_b, image_b_descriptors) + correspondences_b = select_best_correspondences(correspondences_b, image_b_descriptors, correspondences_a, image_a_descriptors) + + print("[+] Correspondences filtered") + def check_repciporacvbillity(c_a, c_b): + for _, match in enumerate(c_a): + if np.flip(match) not in c_b: + print(f'[i] check_repciporacvbillity -> Not reciprocal {match}') + index = np.where((c_a == match).all(axis=1))[0][0] + c_a = np.delete(c_a, index, axis=0) + return c_a + + correspondences_a = check_repciporacvbillity(correspondences_a, correspondences_b) + correspondences_b = check_repciporacvbillity(correspondences_b, correspondences_a) + + print("[+] Correspondences reciprocated") + + # Now map correspondences to the keypoints + def map_indexes_to_points(a_points, b_points, corrs_a, corrs_b, img_a_k, img_b_k): + for correspondence in corrs_a: + ix = np.argwhere(correspondence[0] == corrs_b[:, 1]) + if ix.size > 0: + a_points.append(np.flip(img_a_k[correspondence[0]])) + b_points.append(np.flip(img_b_k[correspondence[1]])) + + image_a_points = [] + image_b_points = [] + + map_indexes_to_points(image_a_points, image_b_points, correspondences_a, correspondences_b, image_a_keypoints, image_b_keypoints) + map_indexes_to_points(image_b_points, image_a_points, correspondences_b, correspondences_a, image_b_keypoints, image_a_keypoints) + print("[+] Correspondences mapped to points") + + return np.array(image_a_points),np.array(image_b_points) + + +def estimate_homography(keypoints: npt.NDArray[np.float64]) \ + -> npt.NDArray[np.float64]: + + """ + [x_r1 yr_1 1 0 0 0 -x_t1*x_r1 -x_t1*yr_1 -x_t1] + [0 0 0 x_r1 yr_1 1 -y_t1*x_r1 -y_t1*yr_1 -y_t1] + .... + Accepts a set of keypoints and returns the homography matrix. + """ + + # Construct the A matrix + A = np.zeros((2 * keypoints.shape[0], 9)) + + for ix, pair in enumerate(keypoints): + x_r, y_r, x_t, y_t = pair + A[ix*2] = [x_r, y_r, 1, 0, 0, 0, -x_t*x_r, -x_t*y_r, -x_t] + A[ix*2+1] = [0, 0, 0, x_r, y_r, 1, -y_t*x_r, -y_t*y_r, -y_t] + + # Perform SVD + _, _, V = np.linalg.svd(A) + # Compute vector h + h = V[-1] / V[-1][-1] + # Reshape to 3x3 + H = h.reshape(3, 3) + + return H + +def ransac(image_a: npt.NDArray[np.float64], correspondences_a: npt.NDArray[np.float64], + image_b: npt.NDArray[np.float64], correspondences_b: npt.NDArray[np.float64], + iterations: int = 5000, + threshold: float = 3) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + """ + RANSAC algorithm for estimating homography. + Accepts two images and their corresponding keypoints. + Returns the best homography matrix and the inliers. + """ + # Find the best homography + best_inliers = [] + best_homography = [] + + for i in tqdm(range(iterations), desc='RANSAC'): + # Randomly sample 4 correspondences + sample = np.random.choice(correspondences_a.shape[0], 4, replace=False) + # correspondance_a[0] (x,y) -> correspondance_b[0] (x,y) + sample_a = correspondences_a[sample] + sample_b = correspondences_b[sample] + # Make it (x,y) -> (x,y) = x,y,x,y matrix + keypoints = np.concatenate((sample_a, sample_b), axis=1) + # Estimate homography + homography = estimate_homography(keypoints) + # Compute the inliers + inlier_indices = [] + # Calculate the distance between the transformed points and the actual points + # and count the number of inliers + for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)): + (x_r, y_r), (x_t, y_t) = correspondence + res = np.dot(homography, [x_r, y_r, 1]) + # Make sure that res has last element 1 + res = res / res[-1] + x_t_, y_t_ = res[:2] + # Compute the distance + distance = np.sqrt((x_t - x_t_)**2 + (y_t - y_t_)**2) + # Check if it is an inlier + if distance < threshold: + inlier_indices.append(i) + + inliers_proportion = len(inlier_indices)/len(correspondences_a) + + # Check if we have a new best homography + if inliers_proportion > 0.2: + homography_2 = estimate_homography(np.concatenate((correspondences_a[inlier_indices], correspondences_b[inlier_indices]), axis=1)) + inlier_indices_2 = [] + for i, correspondence in enumerate(zip(correspondences_a, correspondences_b)): + (x_r, y_r), (x_t, y_t) = correspondence + res = np.dot(homography_2, [x_r, y_r, 1]) + # Make sure that res has last element 1 + res = res / res[-1] + x_t_, y_t_ = res[:2] + # Compute the distance + distance = np.sqrt((x_t - x_t_)**2 + (y_t - y_t_)**2) + # Check if it is an inlier + if distance < threshold: + inlier_indices_2.append(i) + + if len(inlier_indices_2) / len(correspondences_a) > len(best_inliers) / len(correspondences_a): + best_inliers = inlier_indices_2 + best_homography = homography_2 + + + best_keypoints = np.concatenate((correspondences_a[best_inliers], correspondences_b[best_inliers]), axis=1) + + if best_homography == []: + raise Exception("Ransac did not converge") + + return best_homography.astype(np.float64), best_keypoints + + +def sift(grayscale_image: npt.NDArray[np.float64], + plot=False, get_descriptors=False) \ + -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + """ + SIFT algorithm for finding keypoints and descriptors. + """ + grayscale_image = grayscale_image.copy() + + def number_of_ocatves(image): + """ + Calculate the number of octaves. + """ + return int(np.log2(min(image.shape))) + + # Firstly downcale the image three times + different_scale_images = [] + different_scale_images.append(grayscale_image) + downsample_size = number_of_ocatves(grayscale_image) + for i in range(downsample_size): + different_scale_images.append(cv2.pyrDown(different_scale_images[-1])) + + def generateGaussianKernels(sigma, num_intervals): + """ + Generate list of gaussian kernels at which to blur the input image. + Default values of sigma, intervals, and octaves follow section 3 of Lowe's paper. + (I stole this from the internet) + """ + num_images_per_octave = num_intervals + 3 + k = 2 ** (1. / num_intervals) + gaussian_kernels = np.zeros(num_images_per_octave) # scale of gaussian blur necessary to go from one blur scale to the next within an octave + gaussian_kernels[0] = sigma + + for image_index in range(1, num_images_per_octave): + sigma_previous = (k ** (image_index - 1)) * sigma + sigma_total = k * sigma_previous + gaussian_kernels[image_index] = np.sqrt(sigma_total ** 2 - sigma_previous ** 2) + return gaussian_kernels + + + # Blur different scale images with gaussian blur num_of_octaves times + downsampled_and_blurred_images = [] + gauss_kernels = generateGaussianKernels(1.6, downsample_size) + for i in range(len(different_scale_images)): + image = different_scale_images[i] + images = [image] # Do not blur the first image + for kernel in gauss_kernels[1:]: + images.append(gaussfilter2D(images[-1], kernel)) + downsampled_and_blurred_images.append(np.array(images)) + + # Plot all downsampled and blurred images + if plot: + fig,axs=plt.subplots(len(downsampled_and_blurred_images), len(downsampled_and_blurred_images[0]), figsize=(20, 20)) + fig.suptitle('Downsampled and blurred images') + for i in range(len(downsampled_and_blurred_images)): + for j in range(len(downsampled_and_blurred_images[i])): + axs[i, j].imshow(downsampled_and_blurred_images[i][j], cmap='gray') + + plt.show() + + # Compute the difference of gaussian + DOG = [[] for _ in range(len(downsampled_and_blurred_images))] + for i in range(len(downsampled_and_blurred_images)): + for j in range(len(downsampled_and_blurred_images[i])-1): + DOG[i].append(np.array(downsampled_and_blurred_images[i][j+1] - downsampled_and_blurred_images[i][j])) + + # Plot the difference of gaussian + if plot: + fig,axs=plt.subplots(len(DOG), len(DOG[0]), figsize=(20, 20)) + fig.suptitle('Difference of gaussian') + for i in range(len(DOG)): + for j in range(len(DOG[i])): + axs[i, j].imshow(DOG[i][j], cmap='gray') + plt.show() + + def is_extremum(image_prev_part, image_part, image_next_part): + """ + Check if the given image part is an extremum. + """ + compare_pixel = image_part[1, 1] + if np.abs(compare_pixel) < 0.01: + return False + if compare_pixel > 0: + if np.all(compare_pixel >= image_prev_part) and \ + np.all(compare_pixel >= image_part[0,:]) and \ + np.all(compare_pixel >= image_part[2,:]) and \ + np.all(compare_pixel >= image_next_part): + return True # Local maximum + else: + if np.all(compare_pixel <= image_prev_part) and \ + np.all(compare_pixel <= image_part) and \ + np.all(compare_pixel <= image_part) and \ + np.all(compare_pixel <= image_next_part): + return True # Local minimum + return False + + ############################## + # Find keypoints + ############################## + keypoints = [] + # Go throgh all image scales per octave + for i in tqdm(range(len(DOG)), desc='Finding SIFT keypoints'): + per_octave_images = DOG[i] # Retrive all images per octave + for j in range(1, len(per_octave_images)-1): + # Go through all images per octave by 3 + image_prev, image, image_next = per_octave_images[j-1], per_octave_images[j], per_octave_images[j+1] + for y in range(8, image.shape[0]-8): + for x in range(8, image.shape[1]-8): + # Check if the pixel is a local extremum + if is_extremum(image_prev[y-1:y+2, x-1:x+2], image[y-1:y+2, x-1:x+2], image_next[y-1:y+2, x-1:x+2]): + # If keypoint is a good local extremum, add it to the list + keypoints.append((y, x, i, j)) # (y, x, octave, image scale) + ############################## + + # Compute descriptors + def compute_descriptors(keypoints): + """ + Compute the descriptors for the given keypoints. + """ + descriptors = [] + + for keypoint in keypoints: + y, x = keypoint[:2] + octave, img_ix = keypoint[2:] + # Get the image + image = downsampled_and_blurred_images[octave][img_ix] + + def compute_mag_and_angle(image, y, x): + # Compute the gradient magnitude and orientation at the point + dx = image[y+1, x] - image[y-1, x] + dy = image[y, x+1] - image[y, x-1] + magnitude = np.sqrt(dx**2 + dy**2) + orientation = np.arctan2(dy, dx) + return magnitude, orientation + _, orientation = compute_mag_and_angle(image, y, x) + + if orientation < 0: + orientation %= np.pi + + def create_indexes(y, x, x_offset, y_offset): + # Create indexes for the given offsets + indexes = [] + for i in range(x_offset): + for j in range(y_offset): + indexes.append((y+j, x+i)) + return np.array(indexes) + + # Map orientation to which direction to look at + if orientation < np.pi/8: # Right + # Get indexes + indexes = create_indexes(y, x, 8, 8) + elif orientation < 3*np.pi/8: # Right up + indexes = create_indexes(y-8, x, 8, 8) + elif orientation < 5*np.pi/8: # Up + indexes = create_indexes(y-4, x-8, 8, 8) + elif orientation < 7*np.pi/8: # Left up + indexes = create_indexes(y-8, x-8, 8, 8) + else: # Left + indexes = create_indexes(y-4, x-8, 8, 8) + + # Split indexes into 4 blocks + blocks = np.array_split(indexes, 4) + + histogram_space = np.linspace(0, np.pi, 8) + all_histograms = [] + for block in blocks: + # Split each block in 4x4 cells and compute histogram for each cell + cells = np.array_split(block, 4) + histograms = [] + for cell in cells: + histogram = np.zeros(8) + for y, x in cell: + magnitude, orientation = compute_mag_and_angle(image,y, x) + # Compute the histogram + if orientation < 0: + orientation %= np.pi + # Find the bin + bin = np.digitize(orientation, histogram_space) + histogram[bin - 1] += magnitude + histograms.append(histogram) + # Concatenate the histograms + histograms = np.concatenate(histograms) + # Smooth the histogram + all_histograms.append(histograms/np.sum(histograms)) + # Concatenate the histograms + all_histograms = np.concatenate(all_histograms) + # Normalize the histogram + all_histograms = all_histograms / np.sum(all_histograms) + descriptors.append(all_histograms) + return descriptors + + if get_descriptors: + descriptors = compute_descriptors(keypoints) + else: + descriptors = [] + # Rescale the keypoints, as the images were downsampled + keypoints = np.array(keypoints) + for keypoint in keypoints: + if keypoint[2] > 0: + keypoint[0] *= 2 * keypoint[2] + keypoint[1] *= 2 * keypoint[2] + + # Remove two last column from keypoints + keypoints = keypoints[:, :-2] + + return np.array(keypoints), np.array(descriptors) diff --git a/assignment5/uz_framework/text.py b/assignment5/uz_framework/text.py new file mode 100644 index 0000000..08ad835 --- /dev/null +++ b/assignment5/uz_framework/text.py @@ -0,0 +1,9 @@ +import numpy as np + +def read_data(filename: str): + # reads a numpy array from a text file + with open(filename) as f: + s = f.read() + + return np.fromstring(s, sep=' ') +