#%%cython --a --compile-args=/openmp
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport exp
from cython.parallel import parallel, prange
@cython.boundscheck(False)
@cython.wraparound(False)
def nlm(np.ndarray[np.float64_t, ndim=2] array,int N,int K,float sigma):
cdef:
Py_ssize_t height = array.shape[0]
Py_ssize_t width = array.shape[1]
int pad_len = N+K
int pad_H = pad_len+height
int pad_W = pad_len+width
double value = 2*sigma*sigma
Py_ssize_t ny,nx,ky,kx
Py_ssize_t h=0, w=0, h1=0, w1=0, h2=0, w2=0, h3=0, w3=0
arraypad = np.zeros((pad_H+pad_len,pad_W+pad_len),dtype=np.float64)
arraypad[pad_len:pad_H,pad_len:pad_W] = array
Y_array = np.zeros((height,width),dtype=np.float64)
X_array = np.zeros((height,width),dtype=np.float64)
ex = np.zeros((height,width),dtype=np.float64)
yy = np.zeros((height,width),dtype=np.float64)
B = np.zeros((height,width),dtype=np.float64)
ssd = np.zeros((height,width),dtype=np.float64)
ppd = np.zeros((height,width),dtype=np.float64)
cdef:
double [:,::1] pad_view = arraypad
double [:,::1] Y_view = Y_array
double [:,::1] X_view = X_array
double [:,::1] yy_view = yy
double [:,::1] ex_view = ex
double [:,::1] B_view = B
double [:,::1] ppd_view = ppd
double [:,::1] ssd_view = ssd
with nogil,parallel():
for ny in prange(-N,N+1):
for nx in range(-N,N+1):
ssd_view[:,::1] = 0
for ky in range(-K,K+1):
for kx in range(-K, K + 1):
Y_view[:,::1] = pad_view[pad_len+ky+ny:pad_H+ky+ny,pad_len+kx+nx:pad_W+kx+nx]
X_view[:,::1] = pad_view[pad_len+ky: pad_H+ky, pad_len+kx: pad_W+kx]
for h in range(height):
for w in range(width):
ssd_view[h,w] +=(Y_view[h,w] - X_view[h,w])*(Y_view[h,w] - X_view[h,w])
for h1 in range(height):
for w1 in range(width):
ex_view[h1,w1] = exp(-(ssd_view[h1,w1]/value))
for h2 in range(height):
for w2 in range(width):
B_view[h2,w2] += ex_view[h2,w2]
ppd_view[:,::1] = pad_view[pad_len+ny:pad_H+ny,pad_len+nx:pad_W+nx]
for h3 in range(height):
for w3 in range(width):
yy_view[h3,w3] += ex_view[h3,w3] * ppd_view[h3,w3]
return yy/B