diff options
author | Soumith Chintala <soumith@gmail.com> | 2015-10-18 22:57:40 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2015-10-18 22:57:40 +0300 |
commit | 5fa8bc0a4eaaa77806a12f6631124a320b4907c2 (patch) | |
tree | c37388f09994288981a3f57030d44fc2ed94f457 | |
parent | 074806a63b637b1c7fe5a65aa9e227fb658c92ea (diff) | |
parent | 93cc9d1c30155238d8223df53bffab9b2c311855 (diff) |
Merge branch 'master' of github.com:torch/imagequiet
-rw-r--r-- | CMakeLists.txt | 41 | ||||
-rw-r--r-- | README.md | 465 | ||||
-rw-r--r-- | doc/colorspace.md | 67 | ||||
-rw-r--r-- | doc/gui.md | 53 | ||||
-rw-r--r-- | doc/index.md | 34 | ||||
-rw-r--r-- | doc/paramtransform.md | 73 | ||||
-rw-r--r-- | doc/saveload.md | 52 | ||||
-rw-r--r-- | doc/simpletransform.md | 111 | ||||
-rw-r--r-- | doc/tensorconstruct.md | 91 | ||||
-rwxr-xr-x | generic/image.c | 502 | ||||
-rwxr-xr-x | generic/png.c | 67 | ||||
-rw-r--r-- | image-1.1.alpha-0.rockspec | 2 | ||||
-rw-r--r-- | init.lua | 215 | ||||
-rw-r--r-- | mkdocs.yml | 14 | ||||
-rw-r--r-- | png.c | 43 | ||||
-rw-r--r-- | test/corrupt-ihdr.png | bin | 0 -> 275 bytes | |||
-rwxr-xr-x | test/test_decompress_jpg.lua | 6 | ||||
-rw-r--r-- | test/test_png.lua | 28 | ||||
-rw-r--r-- | test/test_scale.lua | 51 | ||||
-rw-r--r-- | test/test_warp.lua | 6 |
20 files changed, 1177 insertions, 744 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 57ae2fe..0086cb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,15 +5,50 @@ FIND_PACKAGE(Torch REQUIRED) FIND_PACKAGE(JPEG) FIND_PACKAGE(PNG) +# OpenMP support? +SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?") +IF (APPLE AND CMAKE_COMPILER_IS_GNUCC) + EXEC_PROGRAM (uname ARGS -v OUTPUT_VARIABLE DARWIN_VERSION) + STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION}) + MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}") + IF (DARWIN_VERSION GREATER 9) + SET(APPLE_OPENMP_SUCKS 1) + ENDIF (DARWIN_VERSION GREATER 9) + EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion + OUTPUT_VARIABLE GCC_VERSION) + IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2) + MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)") + MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas") + SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE) + ENDIF () +ENDIF () + +IF (WITH_OPENMP) + FIND_PACKAGE(OpenMP) + IF(OPENMP_FOUND) + MESSAGE(STATUS "Compiling with OpenMP support") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + ENDIF(OPENMP_FOUND) +ENDIF (WITH_OPENMP) + SET(src ppm.c) ADD_TORCH_PACKAGE(ppm "${src}" "${luasrc}" "Image Processing") TARGET_LINK_LIBRARIES(ppm luaT TH) +IF(LUALIB) + TARGET_LINK_LIBRARIES(ppm ${LUALIB}) +ENDIF() if (JPEG_FOUND) SET(src jpeg.c) include_directories (${JPEG_INCLUDE_DIR}) ADD_TORCH_PACKAGE(jpeg "${src}" "${luasrc}" "Image Processing") TARGET_LINK_LIBRARIES(jpeg luaT TH ${JPEG_LIBRARIES}) + IF(LUALIB) + TARGET_LINK_LIBRARIES(jpeg ${LUALIB}) + ENDIF() else (JPEG_FOUND) message ("WARNING: Could not find JPEG libraries, JPEG wrapper will not be installed") endif (JPEG_FOUND) @@ -23,6 +58,9 @@ if (PNG_FOUND) include_directories (${PNG_INCLUDE_DIR}) ADD_TORCH_PACKAGE(png "${src}" "${luasrc}" "Image Processing") TARGET_LINK_LIBRARIES(png luaT TH ${PNG_LIBRARIES}) + IF(LUALIB) + TARGET_LINK_LIBRARIES(png ${LUALIB}) + ENDIF() else (PNG_FOUND) message ("WARNING: Could not find PNG libraries, PNG wrapper will not be installed") endif (PNG_FOUND) @@ -32,4 +70,7 @@ SET(luasrc init.lua fabio.jpg fabio.png lena.jpg lena.png win.ui) ADD_TORCH_PACKAGE(image "${src}" "${luasrc}" "Image Processing") TARGET_LINK_LIBRARIES(image luaT TH) +IF(LUALIB) + TARGET_LINK_LIBRARIES(image ${LUALIB}) +ENDIF() INSTALL(FILES "README.md" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/image") @@ -1,466 +1,31 @@ # image Package Reference Manual # + __image__ is the [Torch7 distribution](http://torch.ch/) package for processing images. It contains a wide variety of functions divided into the following categories: - * [Saving and loading](#image.saveload) images as JPEG, PNG, PPM and PGM; - * [Simple transformations](#image.simpletrans) like translation, scaling and rotation; - * [Parameterized transformations](#image.paramtrans) like convolutions and warping; - * [Graphical user interfaces](#image.grapicalinter) like display and window; - * [Color Space Conversions](#image.colorspace) from and to RGB, YUV, Lab, and HSL; - * [Tensor Constructors](#image.tensorconst) for creating Lenna, Fabio and Gaussian and Laplacian kernels; + + * [Saving and loading](doc/saveload.md) images as JPEG, PNG, PPM and PGM; + * [Simple transformations](doc/simpletransform.md) like translation, scaling and rotation; + * [Parameterized transformations](doc/paramtransform.md) like convolutions and warping; + * [Graphical user interfaces](doc/gui.md) like display and window; + * [Color Space Conversions](doc/colorspace.md) from and to RGB, YUV, Lab, and HSL; + * [Tensor Constructors](doc/tensorconstruct.md) for creating Lenna, Fabio and Gaussian and Laplacian kernels; Note that unless speficied otherwise, this package deals with images of size `nChannel x height x width`. -<a name="image.saveload"/> -## Saving and Loading ## -This sections includes functions for saving and loading different types -of images to and from disk. - -<a name="image.load"/> -### [res] image.load(filename, [depth, tensortype]) ### -Loads an image located at path `filename` having `depth` channels (1 or 3) -into a [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md#tensor) -of type `tensortype` (*float*, *double* or *byte*). The last two arguments -are optional. - -The image format is determined from the `filename`'s -extension suffix. Supported formats are -[JPEG](https://en.wikipedia.org/wiki/JPEG), -[PNG](https://en.wikipedia.org/wiki/Portable_Network_Graphics), -[PPM and PGM](https://en.wikipedia.org/wiki/Netpbm_format). - -The returned `res` Tensor has size `nChannel x height x width` where `nChannel` is -1 (greyscale) or 3 (usually [RGB](https://en.wikipedia.org/wiki/RGB_color_model) -or [YUV](https://en.wikipedia.org/wiki/YUV). - -<a name="image.save"/> -### image.save(filename, tensor) ### -Saves Tensor `tensor` to disk at path `filename`. The format to which -the image is saved is extrapolated from the `filename`'s extension suffix. -The `tensor` should be of size `nChannel x height x width`. - -<a name="image.decompressJPG"/> -### [res] image.decompressJPG(tensor, [depth, tensortype]) ### -Decompresses an image from a ByteTensor in memory having `depth` channels (1 or 3) -into a [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md#tensor) -of type `tensortype` (*float*, *double* or *byte*). The last two arguments -are optional. - -Usage: -```lua -local fin = torch.DiskFile(imfile, 'r') -fin:binary() -fin:seekEnd() -local file_size_bytes = fin:position() - 1 -fin:seek(1) -local img_binary = torch.ByteTensor(file_size_bytes) -fin:readByte(img_binary:storage()) -fin:close() --- Then when you're ready to decompress the ByteTensor: -im = image.decompressJPG(img_binary) -``` - -<a name="image.compressJPG"/> -### [res] image.compressJPG(tensor, [quality]) ### -Compresses an image to a ByteTensor in memory. Optional quality is between 1 and 100 and adjusts compression quality. - -<a name="image.simpletrans"/> -## Simple Transformations ## -This section includes simple but very common image transformations -like cropping, translation, scaling and rotation. - -<a name="image.crop"/> -### [res] image.crop([dst,] src, x1, y1, [x2, y2]) ### -Crops image `src` at coordinate `(x1, y1)` up to coordinate -`(x2, y2)`. If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.translate"/> -### [res] image.translate([dst,] src, x, y) ### -Translates image `src` by `x` pixels horizontally and `y` pixels -vertically. If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.scale"/> -### [res] image.scale(src, width, height, [mode]) ### -Rescale the height and width of image `src` to have -width `width` and height `height`. Variable `mode` specifies -type of interpolation to be used. Valid values include -[bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation) -(the default) or *simple* interpolation. Returns a new `res` Tensor. - -### [res] image.scale(src, size, [mode]) ### -Rescale the height and width of image `src`. -Variable `size` is a number or a string specifying the -size of the result image. When `size` is a number, it specifies the -maximum height or width of the output. When it is a string like -*WxH* or *MAX* or *^MIN*, it specifies the `height x width`, maximum, or minimum height or -width of the output, respectively. - -### [res] image.scale(dst, src, [mode]) ### -Rescale the height and width of image `src` to fit the dimensions of -Tensor `dst`. - -<a name="image.rotate"/> -### [res] image.rotate([dst,], src, theta, [mode]) ### -Rotates image `src` by `theta` radians. -If `dst` is specified it is used to store the results of the rotation. -Variable `mode` specifies type of interpolation to be used. Valid values include -*simple* (the default) or *bilinear* interpolation. - -<a name="image.polar"/> -### [res] image.polar([dst,], src, [interpolation], [mode]) ### -Converts image `src` to polar coordinates. In the polar image, angular information is in the vertical direction and radius information in the horizontal direction. -If `dst` is specified it is used to store the polar image. If `dst` is not specified, its size is automatically determined. Variable `interpolation` specifies type of interpolation to be used. Valid values include *simple* (the default) or *bilinear* interpolation. Variable `mode` determines whether the *full* image is converted to the polar space (implying empty regions in the polar image), or whether only the *valid* central part of the polar transform is returned (the default). - -<a name="image.logpolar"/> -### [res] image.logpolar([dst,], src, [interpolation], [mode]) ### -Converts image `src` to log-polar coordinates. In the log-polar image, angular information is in the vertical direction and log-radius information in the horizontal direction. -If `dst` is specified it is used to store the polar image. If `dst` is not specified, its size is automatically determined. Variable `interpolation` specifies type of interpolation to be used. Valid values include *simple* (the default) or *bilinear* interpolation. Variable `mode` determines whether the *full* image is converted to the log-polar space (implying empty regions in the log-polar image), or whether only the *valid* central part of the log-polar transform is returned (the default). - -<a name="image.hflip"/> -### [res] image.hflip([dst,] src) ### -Flips image `src` horizontally (left<->right). If `dst` is provided, it is used to -store the output image. Otherwise, returns a new `res` Tensor. - -<a name="image.vflip"/> -### [res] image.vflip([dst,], src) ### -Flips image `src` vertically (upsize<->down). If `dst` is provided, it is used to -store the output image. Otherwise, returns a new `res` Tensor. - -<a name="image.flip"/> -### [res] image.flip([dst,] src, flip_dim) ### -Flips image `src` along the specified dimension. If `dst` is provided, it is used to -store the output image. Otherwise, returns a new `res` Tensor. - -<a name="image.minmax"/> -### [res] image.minmax{tensor, [min, max, ...]} ### -Compresses image `tensor` between `min` and `max`. -When omitted, `min` and `max` are infered from -`tensor:min()` and `tensor:max()`, respectively. -The `tensor` is normalized using `min` and `max` by performing : -```lua -tensor:add(-min):div(max-min) -``` -Other optional arguments (`...`) include `symm`, `inplace`, `saturate`, and `tensorOut`. -When `symm=true` and `min` and `max` are both omitted, -`max = min*2` in the above equation. This results in a symmetric dynamic -range that is particularly useful for drawing filters. The default is `false`. -When `inplace=true`, the result of the compression is stored in `tensor`. -The default is `false`. -When `saturate=true`, the result of the compression is passed through -[image.saturate](#image.saturate) -When provided, Tensor `tensorOut` is used to store results. -Note that arguments should be provided as key-value pairs (in a table). - -<a name="image.gaussianpyramid"/> -### [res] image.gaussianpyramid([dst,] src, scales) ### -Constructs a [Gaussian pyramid](https://en.wikipedia.org/wiki/Gaussian_pyramid) -of scales `scales` from a 2D or 3D `src` image or size -`[nChannel x] width x height`. Each Tensor at index `i` -in the returned list of Tensors has size `[nChannel x] width*scales[i] x height*scales[i]`. - -If list `dst` is provided, with or without Tensors, it is used to store the output images. -Otherwise, returns a new `res` list of Tensors. - -Internally, this function makes use of functions [image.gaussian](#image.gaussian), -[image.scale](#image.scale) and [image.convolve](#image.convolve). - -<a name="image.paramtrans"/> -## Parameterized transformations ## -This section includes functions for performing transformations on -images requiring parameter Tensors like a warp `field` or a convolution -`kernel`. - -<a name="image.warp"/> -### [res] image.warp([dst,]src,field,[mode,offset,clamp]) ### -Warps image `src` (of size`KxHxW`) -according to flow field `field`. The latter has size `2xHxW` where the -first dimension is for the `(y,x)` flow field. String `mode` can -take on values [lanczos](https://en.wikipedia.org/wiki/Lanczos_resampling), -[bicubic](https://en.wikipedia.org/wiki/Bicubic_interpolation), -[bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation) (the default), -or *simple*. When `offset` is true (the default), `(x,y)` is added to the flow field. -The `clamp` variable specifies how to handle the interpolation of samples off the input image. -Permitted values are strings *clamp* (the default) or *pad*. -If `dst` is specified, it is used to store the result of the warp. -Otherwise, returns a new `res` Tensor. - -<a name="image.convolve"/> -### [res] image.convolve([dst,] src, kernel, [mode]) ### -Convolves Tensor `kernel` over image `src`. Valid string values for argument -`mode` are : - * *full* : the `src` image is effectively zero-padded such that the `res` of the convolution has the same size as `src`; - * *valid* (the default) : the `res` image will have `math.ceil(kernel/2)` less columns and rows on each side; - * *same* : performs a *full* convolution, but crops out the portion fitting the output size of *valid*; -Note that this function internally uses -[torch.conv2](https://github.com/torch/torch7/blob/master/doc/maths.md#torch.conv.dok). -If `dst` is provided, it is used to store the output image. -Otherwise, returns a new `res` Tensor. - -<a name="image.lcn"/> -### [res] image.lcn(src, [kernel]) ### -Local contrast normalization (LCN) on a given `src` image using kernel `kernel`. -If `kernel` is not given, then a default `9x9` Gaussian is used -(see [image.gaussian](#image.gaussian)). - -To prevent border effects, the image is first global contrast normalized -(GCN) by substracting the global mean and dividing by the global -standard deviation. - -Then the image is locally contrast normalized using the following equation: -```lua -res = (src - lm(src)) / sqrt( lm(src) - lm(src*src) ) -``` -where `lm(x)` is the local mean of each pixel in the image (i.e. -`image.convolve(x,kernel)`) and `sqrt(x)` is the element-wise -square root of `x`. In other words, LCN performs -local substractive and divisive normalization. - -Note that this implementation is different than the LCN Layer defined on page 3 of -[What is the Best Multi-Stage Architecture for Object Recognition?](http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf). - -<a name="image.erode"/> -### [res] image.erode(src, [kernel, pad]) ### -Performs a [morphological erosion](https://en.wikipedia.org/wiki/Erosion_(morphology)) -on binary (zeros and ones) image `src` using odd -dimensioned morphological binary kernel `kernel`. -The default is a kernel consisting of ones of size `3x3`. Number -`pad` is the value to assume outside the image boundary when performing -the convolution. The default is 1. - -<a name="image.dilate"/> -### [res] image.dilate(src, [kernel, pad]) ### -Performs a [morphological dilation](https://en.wikipedia.org/wiki/Dilation_(morphology)) -on binary (zeros and ones) image `src` using odd -dimensioned morphological binary kernel `kernel`. -The default is a kernel consisting of ones of size `3x3`. Number -`pad` is the value to assume outside the image boundary when performing -the convolution. The default is 0. +## Install -<a name="image.grapicalinter"/> -## Graphical User Interfaces ## -The following functions, except for [image.toDisplayTensor](#image.toDisplayTensor), -require package [qtlua](https://github.com/torch/qtlua) and can only be -accessed via the `qlua` Lua interpreter (as opposed to the -[th](https://github.com/torch/trepl) or luajit interpreter). +The easiest way to install this package it by following the [intructions](http://torch.ch/docs/getting-started.html) +to install [Torch7](www.torch.ch), which includes __image__. +Otherwise, to update or manually re-install it: -<a name="image.toDisplayTensor"/> -### [res] image.toDisplayTensor(input, [...]) ### -Optional arguments `[...]` expand to `padding`, `nrow`, `scaleeach`, `min`, `max`, `symmetric`, `saturate`. -Returns a single `res` Tensor that contains a grid of all in the images in `input`. -The latter can either be a table of image Tensors of size `height x width` (greyscale) or -`nChannel x height x width` (color), -or a single Tensor of size `batchSize x nChannel x height x width` or `nChannel x height x width` -where `nChannel=[3,1]`, `batchSize x height x width` or `height x width`. - -When `scaleeach=false` (the default), all detected images -are compressed with successive calls to [image.minmax](#image.minmax): -```lua -image.minmax{tensor=input[i], min=min, max=max, symm=symmetric, saturate=saturate} -``` -`padding` specifies the number of padding pixels between images. The default is 0. -`nrow` specifies the number of images per row. The default is 6. - -Note that arguments can also be specified as key-value arguments (in a table). - -<a name="image.display"/> -### [res] image.display(input, [...]) ### -Optional arguments `[...]` expand to `zoom`, `min`, `max`, `legend`, `win`, -`x`, `y`, `scaleeach`, `gui`, `offscreen`, `padding`, `symm`, `nrow`. -Displays `input` image(s) with optional saturation and zooming. -The `input`, which is either a Tensor of size `HxW`, `KxHxW` or `Kx3xHxW`, or list, -is first prepared for display by passing it through [image.toDisplayTensor](#image.toDisplayTensor): -```lua -input = image.toDisplayTensor{ - input=input, padding=padding, nrow=nrow, saturate=saturate, - scaleeach=scaleeach, min=min, max=max, symmetric=symm -} +```bash +$ luarocks install image ``` -The resulting `input` will be displayed using [qtlua](https://github.com/torch/qtlua). -The displayed image will be zoomed by a factor of `zoom`. The default is 1. -If `gui=true` (the default), the graphical user inteface (GUI) -is an interactive window that provides the user with the ability to zoom in or out. -This can be turned off for a faster display. `legend` is a legend to be displayed, -which has a default value of `image.display`. `win` is an optional qt window descriptor. -If `x` and `y` are given, they are used to offset the image. Both default to 0. -When `offscreen=true`, rendering (to generate images) is performed offscreen. - -<a name="image.window"/> -### [window, painter] image.window([...]) ### -Creates a window context for images. -Optional arguments `[...]` expand to `hook_resize`, `hook_mousepress`, `hook_mousedoublepress`. -These have a default value of `nil`, but may correspond to commensurate qt objects. - -<a name="image.colorspace"/> -## Color Space Conversions ## -This section includes functions for performing conversions between -different color spaces. - -<a name="image.rgb2lab"/> -### [res] image.rgb2lab([dst,] src) ### -Converts a `src` RGB image to [Lab](https://en.wikipedia.org/wiki/Lab_color_space). -If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. -<a name="image.rgb2yuv"/> -### [res] image.rgb2yuv([dst,] src) ### -Converts a RGB image to YUV. If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.yuv2rgb"/> -### [res] image.yuv2rgb([dst,] src) ### -Converts a YUV image to RGB. If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.rgb2y"/> -### [res] image.rgb2y([dst,] src) ### -Converts a RGB image to Y (discard U and V). -If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.rgb2hsl"/> -### [res] image.rgb2hsl([dst,] src) ### -Converts a RGB image to [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV). -If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.hsl2rgb"/> -### [res] image.hsl2rgb([dst,] src) ### -Converts a HSL image to RGB. -If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.rgb2hsv"/> -### [res] image.rgb2hsv([dst,] src) ### -Converts a RGB image to [HSV](https://en.wikipedia.org/wiki/HSL_and_HSV). -If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.hsv2rgb"/> -### [res] image.hsv2rgb([dst,] src) ### -Converts a HSV image to RGB. -If `dst` is provided, it is used to store the output -image. Otherwise, returns a new `res` Tensor. - -<a name="image.rgb2nrgb"/> -### [res] image.rgb2nrgb([dst,] src) ### -Converts an RGB image to normalized-RGB. - -<a name="image.y2jet"/> -### [res] image.y2jet([dst,] src) ### -Converts a L-levels (1 to L) greyscale image into a L-levels jet heat-map. -If `dst` is provided, it is used to store the output image. Otherwise, returns a new `res` Tensor. - -This is particulary helpful for understanding the magnitude of the values of a matrix, or easily spot peaks in scalar field (like probability densities over a 2D area). -For example, you can run it as +## Usage ```lua -image.display{image=image.y2jet(torch.linspace(1,10,10)), zoom=50} -``` - -<a name="image.tensorconst"/> -## Tensor Constructors ## -The following functions construct Tensors like Gaussian or -Laplacian kernels, or images like Lenna and Fabio. - -<a name="image.lena"/> -### [res] image.lena() ### -Returns the classic `Lenna.jpg` image as a `3 x 512 x 512` Tensor. - -<a name="image.fabio"/> -### [res] image.fabio() ### -Returns the `fabio.jpg` image as a `257 x 271` Tensor. - -<a name="image.gaussian"/> -### [res] image.gaussian([size, sigma, amplitude, normalize, [...]]) ### -Returns a 2D [Gaussian](https://en.wikipedia.org/wiki/Gaussian_function) -kernel of size `height x width`. When used as a Gaussian smoothing operator in a 2D -convolution, this kernel is used to `blur` images and remove detail and noise -(ref.: [Gaussian Smoothing](http://homepages.inf.ed.ac.uk/rbf/HIPR2/gsmooth.htm)). -Optional arguments `[...]` expand to -`width`, `height`, `sigma_horz`, `sigma_vert`, `mean_horz`, `mean_vert` and `tensor`. - -The default value of `height` and `width` is `size`, where the latter -has a default value of 3. The amplitude of the Gaussian (its maximum value) -is `amplitude`. The default is 1. -When `normalize=true`, the kernel is normalized to have a sum of 1. -This overrides the `amplitude` argument. The default is `false`. -The default value of the horizontal and vertical standard deviation -`sigma_horz` and `sigma_vert` of the Gaussian kernel is `sigma`, where -the latter has a default value of 0.25. The default values for the -corresponding means `mean_horz` and `mean_vert` are 0.5. Both the -standard deviations and means are relative to kernels of unit width and height -where the top-left corner is the origin. In other works, a mean of 0.5 is -the center of the kernel size, while a standard deviation of 0.25 is a quarter -of it. When `tensor` is provided (a 2D Tensor), the `height`, `width` and `size` are ignored. -It is used to store the returned gaussian kernel. - -Note that arguments can also be specified as key-value arguments (in a table). - -<a name="image.gaussian1D"/> -### [res] image.gaussian1D([size, sigma, amplitude, normalize, mean, tensor]) ### -Returns a 1D Gaussian kernel of size `size`, mean `mean` and standard -deviation `sigma`. -Respectively, these arguments have default values of 3, 0.25 and 0.5. -The amplitude of the Gaussian (its maximum value) -is `amplitude`. The default is 1. -When `normalize=true`, the kernel is normalized to have a sum of 1. -This overrides the `amplitude` argument. The default is `false`. Both the -standard deviation and mean are relative to a kernel of unit size. -In other works, a mean of 0.5 is the center of the kernel size, -while a standard deviation of 0.25 is a quarter of it. -When `tensor` is provided (a 1D Tensor), the `size` is ignored. -It is used to store the returned gaussian kernel. - -Note that arguments can also be specified as key-value arguments (in a table). - -<a name="image.laplacian"/> -### [res] image.laplacian([size, sigma, amplitude, normalize, [...]]) ### -Returns a 2D [Laplacian](https://en.wikipedia.org/wiki/Blob_detection#The_Laplacian_of_Gaussian) -kernel of size `height x width`. -When used in a 2D convolution, the Laplacian of an image highlights -regions of rapid intensity change and is therefore often used for edge detection -(ref.: [Laplacian/Laplacian of Gaussian](http://homepages.inf.ed.ac.uk/rbf/HIPR2/log.htm)). -Optional arguments `[...]` expand to -`width`, `height`, `sigma_horz`, `sigma_vert`, `mean_horz`, `mean_vert`. - -The default value of `height` and `width` is `size`, where the latter -has a default value of 3. The amplitude of the Laplacian (its maximum value) -is `amplitude`. The default is 1. -When `normalize=true`, the kernel is normalized to have a sum of 1. -This overrides the `amplitude` argument. The default is `false`. -The default value of the horizontal and vertical standard deviation -`sigma_horz` and `sigma_vert` of the Laplacian kernel is `sigma`, where -the latter has a default value of 0.25. The default values for the -corresponding means `mean_horz` and `mean_vert` are 0.5. Both the -standard deviations and means are relative to kernels of unit width and height -where the top-left corner is the origin. In other works, a mean of 0.5 is -the center of the kernel size, while a standard deviation of 0.25 is a quarter -of it. - -<a name="image.colormap"/> -### [res] image.colormap(nColor) ### -Creates an optimally-spaced RGB color mapping of `nColor` colors. -Note that the mapping is obtained by generating the colors around -the HSV wheel, varying the Hue component. -The returned `res` Tensor has size `nColor x 3`. - -<a name="image.jetColormap"/> -### [res] image.jetColormap(nColor) ### -Creates a jet (blue to red) RGB color mapping of `nColor` colors. -The returned `res` Tensor has size `nColor x 3`. - -## Dependencies: -[Torch7](www.torch.ch) - -## Install: -``` -$ luarocks install image -``` - -## Use: -``` > require 'image' > l = image.lena() > image.display(l) diff --git a/doc/colorspace.md b/doc/colorspace.md new file mode 100644 index 0000000..d270718 --- /dev/null +++ b/doc/colorspace.md @@ -0,0 +1,67 @@ +<a name="image.colorspace"></a> +## Color Space Conversions ## +This section includes functions for performing conversions between +different color spaces. + +<a name="image.rgb2lab"></a> +### [res] image.rgb2lab([dst,] src) ### +Converts a `src` RGB image to [Lab](https://en.wikipedia.org/wiki/Lab_color_space). +If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.rgb2yuv"></a> +### [res] image.rgb2yuv([dst,] src) ### +Converts a RGB image to YUV. If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.yuv2rgb"></a> +### [res] image.yuv2rgb([dst,] src) ### +Converts a YUV image to RGB. If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.rgb2y"></a> +### [res] image.rgb2y([dst,] src) ### +Converts a RGB image to Y (discard U and V). +If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.rgb2hsl"></a> +### [res] image.rgb2hsl([dst,] src) ### +Converts a RGB image to [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV). +If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.hsl2rgb"></a> +### [res] image.hsl2rgb([dst,] src) ### +Converts a HSL image to RGB. +If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.rgb2hsv"></a> +### [res] image.rgb2hsv([dst,] src) ### +Converts a RGB image to [HSV](https://en.wikipedia.org/wiki/HSL_and_HSV). +If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.hsv2rgb"></a> +### [res] image.hsv2rgb([dst,] src) ### +Converts a HSV image to RGB. +If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.rgb2nrgb"></a> +### [res] image.rgb2nrgb([dst,] src) ### +Converts an RGB image to normalized-RGB. + +<a name="image.y2jet"></a> +### [res] image.y2jet([dst,] src) ### +Converts a L-levels (1 to L) greyscale image into a L-levels jet heat-map. +If `dst` is provided, it is used to store the output image. Otherwise, returns a new `res` Tensor. + +This is particulary helpful for understanding the magnitude of the values of a matrix, or easily spot peaks in scalar field (like probability densities over a 2D area). +For example, you can run it as + +```lua +image.display{image=image.y2jet(torch.linspace(1,10,10)), zoom=50} +``` + diff --git a/doc/gui.md b/doc/gui.md new file mode 100644 index 0000000..3213af3 --- /dev/null +++ b/doc/gui.md @@ -0,0 +1,53 @@ +<a name="image.grapicalinter"></a> +## Graphical User Interfaces ## +The following functions, except for [image.toDisplayTensor](#image.toDisplayTensor), +require package [qtlua](https://github.com/torch/qtlua) and can only be +accessed via the `qlua` Lua interpreter (as opposed to the +[th](https://github.com/torch/trepl) or luajit interpreter). + +<a name="image.toDisplayTensor"></a> +### [res] image.toDisplayTensor(input, [...]) ### +Optional arguments `[...]` expand to `padding`, `nrow`, `scaleeach`, `min`, `max`, `symmetric`, `saturate`. +Returns a single `res` Tensor that contains a grid of all in the images in `input`. +The latter can either be a table of image Tensors of size `height x width` (greyscale) or +`nChannel x height x width` (color), +or a single Tensor of size `batchSize x nChannel x height x width` or `nChannel x height x width` +where `nChannel=[3,1]`, `batchSize x height x width` or `height x width`. + +When `scaleeach=false` (the default), all detected images +are compressed with successive calls to [image.minmax](simpletransform.md#image.minmax): +```lua +image.minmax{tensor=input[i], min=min, max=max, symm=symmetric, saturate=saturate} +``` +`padding` specifies the number of padding pixels between images. The default is 0. +`nrow` specifies the number of images per row. The default is 6. + +Note that arguments can also be specified as key-value arguments (in a table). + +<a name="image.display"></a> +### [res] image.display(input, [...]) ### +Optional arguments `[...]` expand to `zoom`, `min`, `max`, `legend`, `win`, +`x`, `y`, `scaleeach`, `gui`, `offscreen`, `padding`, `symm`, `nrow`. +Displays `input` image(s) with optional saturation and zooming. +The `input`, which is either a Tensor of size `HxW`, `KxHxW` or `Kx3xHxW`, or list, +is first prepared for display by passing it through [image.toDisplayTensor](#image.toDisplayTensor): +```lua +input = image.toDisplayTensor{ + input=input, padding=padding, nrow=nrow, saturate=saturate, + scaleeach=scaleeach, min=min, max=max, symmetric=symm +} +``` +The resulting `input` will be displayed using [qtlua](https://github.com/torch/qtlua). +The displayed image will be zoomed by a factor of `zoom`. The default is 1. +If `gui=true` (the default), the graphical user inteface (GUI) +is an interactive window that provides the user with the ability to zoom in or out. +This can be turned off for a faster display. `legend` is a legend to be displayed, +which has a default value of `image.display`. `win` is an optional qt window descriptor. +If `x` and `y` are given, they are used to offset the image. Both default to 0. +When `offscreen=true`, rendering (to generate images) is performed offscreen. + +<a name="image.window"></a> +### [window, painter] image.window([...]) ### +Creates a window context for images. +Optional arguments `[...]` expand to `hook_resize`, `hook_mousepress`, `hook_mousedoublepress`. +These have a default value of `nil`, but may correspond to commensurate qt objects. diff --git a/doc/index.md b/doc/index.md new file mode 100644 index 0000000..bf2a3a3 --- /dev/null +++ b/doc/index.md @@ -0,0 +1,34 @@ +# image Package Reference Manual # + +__image__ is the [Torch7 distribution](http://torch.ch/) package for processing +images. It contains a wide variety of functions divided into the following categories: + + * [Saving and loading](saveload.md) images as JPEG, PNG, PPM and PGM; + * [Simple transformations](simpletransform.md) like translation, scaling and rotation; + * [Parameterized transformations](paramtransform.md) like convolutions and warping; + * [Graphical user interfaces](gui.md) like display and window; + * [Color Space Conversions](colorspace.md) from and to RGB, YUV, Lab, and HSL; + * [Tensor Constructors](tensorconstruct.md) for creating Lenna, Fabio and Gaussian and Laplacian kernels; + +Note that unless speficied otherwise, this package deals with images of size +`nChannel x height x width`. + +## Install + +The easiest way to install this package it by following the [intructions](http://torch.ch/docs/getting-started.html) +to install [Torch7](www.torch.ch), which includes __image__. +Otherwise, to update or manually re-install it: + +```bash +$ luarocks install image +``` + +## Usage + +```lua +> require 'image' +> l = image.lena() +> image.display(l) +> f = image.fabio() +> image.display(f) +``` diff --git a/doc/paramtransform.md b/doc/paramtransform.md new file mode 100644 index 0000000..839c754 --- /dev/null +++ b/doc/paramtransform.md @@ -0,0 +1,73 @@ +<a name="image.paramtrans"></a> +## Parameterized transformations ## +This section includes functions for performing transformations on +images requiring parameter Tensors like a warp `field` or a convolution +`kernel`. + +<a name="image.warp"></a> +### [res] image.warp([dst,]src,field,[mode,offset,clamp_mode,pad_val]) ### +Warps image `src` (of size`KxHxW`) +according to flow field `field`. The latter has size `2xHxW` where the +first dimension is for the `(y,x)` flow field. String `mode` can +take on values [lanczos](https://en.wikipedia.org/wiki/Lanczos_resampling), +[bicubic](https://en.wikipedia.org/wiki/Bicubic_interpolation), +[bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation) (the default), +or *simple*. When `offset` is true (the default), `(x,y)` is added to the flow field. +The `clamp_mode` variable specifies how to handle the interpolation of samples off the input image. +Permitted values are strings *clamp* (the default) or *pad*. +When `clamp_mode` equals `pad`, the user can specify the padding value with `pad_val` (default = 0). Note: setting this value when `clamp_mode` equals `clamp` will result in an error. +If `dst` is specified, it is used to store the result of the warp. +Otherwise, returns a new `res` Tensor. + +<a name="image.convolve"></a> +### [res] image.convolve([dst,] src, kernel, [mode]) ### +Convolves Tensor `kernel` over image `src`. Valid string values for argument +`mode` are : + * *full* : the `src` image is effectively zero-padded such that the `res` of the convolution has the same size as `src`; + * *valid* (the default) : the `res` image will have `math.ceil(kernel/2)` less columns and rows on each side; + * *same* : performs a *full* convolution, but crops out the portion fitting the output size of *valid*; +Note that this function internally uses +[torch.conv2](https://github.com/torch/torch7/blob/master/doc/maths.md#torch.conv.dok). +If `dst` is provided, it is used to store the output image. +Otherwise, returns a new `res` Tensor. + +<a name="image.lcn"></a> +### [res] image.lcn(src, [kernel]) ### +Local contrast normalization (LCN) on a given `src` image using kernel `kernel`. +If `kernel` is not given, then a default `9x9` Gaussian is used +(see [image.gaussian](tensorconstruct.md#image.gaussian)). + +To prevent border effects, the image is first global contrast normalized +(GCN) by substracting the global mean and dividing by the global +standard deviation. + +Then the image is locally contrast normalized using the following equation: +```lua +res = (src - lm(src)) / sqrt( lm(src) - lm(src*src) ) +``` +where `lm(x)` is the local mean of each pixel in the image (i.e. +`image.convolve(x,kernel)`) and `sqrt(x)` is the element-wise +square root of `x`. In other words, LCN performs +local substractive and divisive normalization. + +Note that this implementation is different than the LCN Layer defined on page 3 of +[What is the Best Multi-Stage Architecture for Object Recognition?](http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf). + +<a name="image.erode"></a> +### [res] image.erode(src, [kernel, pad]) ### +Performs a [morphological erosion](https://en.wikipedia.org/wiki/Erosion_(morphology)) +on binary (zeros and ones) image `src` using odd +dimensioned morphological binary kernel `kernel`. +The default is a kernel consisting of ones of size `3x3`. Number +`pad` is the value to assume outside the image boundary when performing +the convolution. The default is 1. + +<a name="image.dilate"></a> +### [res] image.dilate(src, [kernel, pad]) ### +Performs a [morphological dilation](https://en.wikipedia.org/wiki/Dilation_(morphology)) +on binary (zeros and ones) image `src` using odd +dimensioned morphological binary kernel `kernel`. +The default is a kernel consisting of ones of size `3x3`. Number +`pad` is the value to assume outside the image boundary when performing +the convolution. The default is 0. + diff --git a/doc/saveload.md b/doc/saveload.md new file mode 100644 index 0000000..a8739d9 --- /dev/null +++ b/doc/saveload.md @@ -0,0 +1,52 @@ +<a name="image.saveload"></a> +## Saving and Loading ## +This sections includes functions for saving and loading different types +of images to and from disk. + +<a name="image.load"></a> +### [res] image.load(filename, [depth, tensortype]) ### +Loads an image located at path `filename` having `depth` channels (1 or 3) +into a [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md#tensor) +of type `tensortype` (*float*, *double* or *byte*). The last two arguments +are optional. + +The image format is determined from the `filename`'s +extension suffix. Supported formats are +[JPEG](https://en.wikipedia.org/wiki/JPEG), +[PNG](https://en.wikipedia.org/wiki/Portable_Network_Graphics), +[PPM and PGM](https://en.wikipedia.org/wiki/Netpbm_format). + +The returned `res` Tensor has size `nChannel x height x width` where `nChannel` is +1 (greyscale) or 3 (usually [RGB](https://en.wikipedia.org/wiki/RGB_color_model) +or [YUV](https://en.wikipedia.org/wiki/YUV). + +<a name="image.save"></a> +### image.save(filename, tensor) ### +Saves Tensor `tensor` to disk at path `filename`. The format to which +the image is saved is extrapolated from the `filename`'s extension suffix. +The `tensor` should be of size `nChannel x height x width`. + +<a name="image.decompressJPG"></a> +### [res] image.decompressJPG(tensor, [depth, tensortype]) ### +Decompresses an image from a ByteTensor in memory having `depth` channels (1 or 3) +into a [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md#tensor) +of type `tensortype` (*float*, *double* or *byte*). The last two arguments +are optional. + +Usage: +```lua +local fin = torch.DiskFile(imfile, 'r') +fin:binary() +fin:seekEnd() +local file_size_bytes = fin:position() - 1 +fin:seek(1) +local img_binary = torch.ByteTensor(file_size_bytes) +fin:readByte(img_binary:storage()) +fin:close() +-- Then when you're ready to decompress the ByteTensor: +im = image.decompressJPG(img_binary) +``` + +<a name="image.compressJPG"></a> +### [res] image.compressJPG(tensor, [quality]) ### +Compresses an image to a ByteTensor in memory. Optional quality is between 1 and 100 and adjusts compression quality. diff --git a/doc/simpletransform.md b/doc/simpletransform.md new file mode 100644 index 0000000..2f5be1f --- /dev/null +++ b/doc/simpletransform.md @@ -0,0 +1,111 @@ +<a name="image.simpletrans"></a> +## Simple Transformations ## +This section includes simple but very common image transformations +like cropping, translation, scaling and rotation. + +<a name="image.crop"></a> +### [res] image.crop([dst,] src, x1, y1, [x2, y2]) ### +Crops image `src` at coordinate `(x1, y1)` up to coordinate +`(x2, y2)`. If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +### [res] image.crop([dst,] src, format, width, height) ### +Crops a `width x height` section of source image `src`. The argument +`format` is a string specifying where to crop: it can be "c", "tl", "tr", +"bl" or "br" for center, top left, top right, bottom left and bottom right, +respectively. If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.translate"></a> +### [res] image.translate([dst,] src, x, y) ### +Translates image `src` by `x` pixels horizontally and `y` pixels +vertically. If `dst` is provided, it is used to store the output +image. Otherwise, returns a new `res` Tensor. + +<a name="image.scale"></a> +### [res] image.scale(src, width, height, [mode]) ### +Rescale the height and width of image `src` to have +width `width` and height `height`. Variable `mode` specifies +type of interpolation to be used. Valid values include +[bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation) +(the default), [bicubic](https://en.wikipedia.org/wiki/Bicubic_interpolation), +or *simple* interpolation. Returns a new `res` Tensor. + +### [res] image.scale(src, size, [mode]) ### +Rescale the height and width of image `src`. Variable `size` is a number +or a string specifying the size of the result image. When `size` is a +number, it specifies the maximum height or width of the output. When it is +a string like `WxH` or `MAX` or `^MIN`, `*SC` or `*SCn/SCd` it specifies +the `height x width`, maximum height or width of the output, minimum height +or width of the output, scaling factor (number), or fractional scaling +factor (int/int), respectively. + +### [res] image.scale(dst, src, [mode]) ### +Rescale the height and width of image `src` to fit the dimensions of +Tensor `dst`. + +<a name="image.rotate"></a> +### [res] image.rotate([dst,], src, theta, [mode]) ### +Rotates image `src` by `theta` radians. +If `dst` is specified it is used to store the results of the rotation. +Variable `mode` specifies type of interpolation to be used. Valid values include +*simple* (the default) or *bilinear* interpolation. + +<a name="image.polar"></a> +### [res] image.polar([dst,], src, [interpolation], [mode]) ### +Converts image `src` to polar coordinates. In the polar image, angular information is in the vertical direction and radius information in the horizontal direction. +If `dst` is specified it is used to store the polar image. If `dst` is not specified, its size is automatically determined. Variable `interpolation` specifies type of interpolation to be used. Valid values include *simple* (the default) or *bilinear* interpolation. Variable `mode` determines whether the *full* image is converted to the polar space (implying empty regions in the polar image), or whether only the *valid* central part of the polar transform is returned (the default). + +<a name="image.logpolar"></a> +### [res] image.logpolar([dst,], src, [interpolation], [mode]) ### +Converts image `src` to log-polar coordinates. In the log-polar image, angular information is in the vertical direction and log-radius information in the horizontal direction. +If `dst` is specified it is used to store the polar image. If `dst` is not specified, its size is automatically determined. Variable `interpolation` specifies type of interpolation to be used. Valid values include *simple* (the default) or *bilinear* interpolation. Variable `mode` determines whether the *full* image is converted to the log-polar space (implying empty regions in the log-polar image), or whether only the *valid* central part of the log-polar transform is returned (the default). + +<a name="image.hflip"></a> +### [res] image.hflip([dst,] src) ### +Flips image `src` horizontally (left<->right). If `dst` is provided, it is used to +store the output image. Otherwise, returns a new `res` Tensor. + +<a name="image.vflip"></a> +### [res] image.vflip([dst,], src) ### +Flips image `src` vertically (upsize<->down). If `dst` is provided, it is used to +store the output image. Otherwise, returns a new `res` Tensor. + +<a name="image.flip"></a> +### [res] image.flip([dst,] src, flip_dim) ### +Flips image `src` along the specified dimension. If `dst` is provided, it is used to +store the output image. Otherwise, returns a new `res` Tensor. + +<a name="image.minmax"></a> +### [res] image.minmax{tensor, [min, max, ...]} ### +Compresses image `tensor` between `min` and `max`. +When omitted, `min` and `max` are infered from +`tensor:min()` and `tensor:max()`, respectively. +The `tensor` is normalized using `min` and `max` by performing : +```lua +tensor:add(-min):div(max-min) +``` +Other optional arguments (`...`) include `symm`, `inplace`, `saturate`, and `tensorOut`. +When `symm=true` and `min` and `max` are both omitted, +`max = min*2` in the above equation. This results in a symmetric dynamic +range that is particularly useful for drawing filters. The default is `false`. +When `inplace=true`, the result of the compression is stored in `tensor`. +The default is `false`. +When `saturate=true`, the result of the compression is passed through +a function that clips the values between 0 and 1 +(i.e. anything below 0 is set to 0, anything above 1 is set to 1). +When provided, Tensor `tensorOut` is used to store results. +Note that arguments should be provided as key-value pairs (in a table). + +<a name="image.gaussianpyramid"></a> +### [res] image.gaussianpyramid([dst,] src, scales) ### +Constructs a [Gaussian pyramid](https://en.wikipedia.org/wiki/Gaussian_pyramid) +of scales `scales` from a 2D or 3D `src` image or size +`[nChannel x] width x height`. Each Tensor at index `i` +in the returned list of Tensors has size `[nChannel x] width*scales[i] x height*scales[i]`. + +If list `dst` is provided, with or without Tensors, it is used to store the output images. +Otherwise, returns a new `res` list of Tensors. + +Internally, this function makes use of functions [image.gaussian](tensorconstruct.md#image.gaussian), +[image.scale](#image.scale) and [image.convolve](paramtransform.md#image.convolve). diff --git a/doc/tensorconstruct.md b/doc/tensorconstruct.md new file mode 100644 index 0000000..18b909e --- /dev/null +++ b/doc/tensorconstruct.md @@ -0,0 +1,91 @@ +<a name="image.tensorconst"></a> +## Tensor Constructors ## +The following functions construct Tensors like Gaussian or +Laplacian kernels, or images like Lenna and Fabio. + +<a name="image.lena"></a> +### [res] image.lena() ### +Returns the classic `Lenna.jpg` image as a `3 x 512 x 512` Tensor. + +<a name="image.fabio"></a> +### [res] image.fabio() ### +Returns the `fabio.jpg` image as a `257 x 271` Tensor. + +<a name="image.gaussian"></a> +### [res] image.gaussian([size, sigma, amplitude, normalize, [...]]) ### +Returns a 2D [Gaussian](https://en.wikipedia.org/wiki/Gaussian_function) +kernel of size `height x width`. When used as a Gaussian smoothing operator in a 2D +convolution, this kernel is used to `blur` images and remove detail and noise +(ref.: [Gaussian Smoothing](http://homepages.inf.ed.ac.uk/rbf/HIPR2/gsmooth.htm)). +Optional arguments `[...]` expand to +`width`, `height`, `sigma_horz`, `sigma_vert`, `mean_horz`, `mean_vert` and `tensor`. + +The default value of `height` and `width` is `size`, where the latter +has a default value of 3. The amplitude of the Gaussian (its maximum value) +is `amplitude`. The default is 1. +When `normalize=true`, the kernel is normalized to have a sum of 1. +This overrides the `amplitude` argument. The default is `false`. +The default value of the horizontal and vertical standard deviation +`sigma_horz` and `sigma_vert` of the Gaussian kernel is `sigma`, where +the latter has a default value of 0.25. The default values for the +corresponding means `mean_horz` and `mean_vert` are 0.5. Both the +standard deviations and means are relative to kernels of unit width and height +where the top-left corner is the origin. In other works, a mean of 0.5 is +the center of the kernel size, while a standard deviation of 0.25 is a quarter +of it. When `tensor` is provided (a 2D Tensor), the `height`, `width` and `size` are ignored. +It is used to store the returned gaussian kernel. + +Note that arguments can also be specified as key-value arguments (in a table). + +<a name="image.gaussian1D"></a> +### [res] image.gaussian1D([size, sigma, amplitude, normalize, mean, tensor]) ### +Returns a 1D Gaussian kernel of size `size`, mean `mean` and standard +deviation `sigma`. +Respectively, these arguments have default values of 3, 0.25 and 0.5. +The amplitude of the Gaussian (its maximum value) +is `amplitude`. The default is 1. +When `normalize=true`, the kernel is normalized to have a sum of 1. +This overrides the `amplitude` argument. The default is `false`. Both the +standard deviation and mean are relative to a kernel of unit size. +In other works, a mean of 0.5 is the center of the kernel size, +while a standard deviation of 0.25 is a quarter of it. +When `tensor` is provided (a 1D Tensor), the `size` is ignored. +It is used to store the returned gaussian kernel. + +Note that arguments can also be specified as key-value arguments (in a table). + +<a name="image.laplacian"></a> +### [res] image.laplacian([size, sigma, amplitude, normalize, [...]]) ### +Returns a 2D [Laplacian](https://en.wikipedia.org/wiki/Blob_detection#The_Laplacian_of_Gaussian) +kernel of size `height x width`. +When used in a 2D convolution, the Laplacian of an image highlights +regions of rapid intensity change and is therefore often used for edge detection +(ref.: [Laplacian/Laplacian of Gaussian](http://homepages.inf.ed.ac.uk/rbf/HIPR2/log.htm)). +Optional arguments `[...]` expand to +`width`, `height`, `sigma_horz`, `sigma_vert`, `mean_horz`, `mean_vert`. + +The default value of `height` and `width` is `size`, where the latter +has a default value of 3. The amplitude of the Laplacian (its maximum value) +is `amplitude`. The default is 1. +When `normalize=true`, the kernel is normalized to have a sum of 1. +This overrides the `amplitude` argument. The default is `false`. +The default value of the horizontal and vertical standard deviation +`sigma_horz` and `sigma_vert` of the Laplacian kernel is `sigma`, where +the latter has a default value of 0.25. The default values for the +corresponding means `mean_horz` and `mean_vert` are 0.5. Both the +standard deviations and means are relative to kernels of unit width and height +where the top-left corner is the origin. In other works, a mean of 0.5 is +the center of the kernel size, while a standard deviation of 0.25 is a quarter +of it. + +<a name="image.colormap"></a> +### [res] image.colormap(nColor) ### +Creates an optimally-spaced RGB color mapping of `nColor` colors. +Note that the mapping is obtained by generating the colors around +the HSV wheel, varying the Hue component. +The returned `res` Tensor has size `nColor x 3`. + +<a name="image.jetColormap"></a> +### [res] image.jetColormap(nColor) ### +Creates a jet (blue to red) RGB color mapping of `nColor` colors. +The returned `res` Tensor has size `nColor x 3`. diff --git a/generic/image.c b/generic/image.c index 27469ab..89ba31f 100755 --- a/generic/image.c +++ b/generic/image.c @@ -47,14 +47,14 @@ static long image_(Main_op_depth)( THTensor *T){ return 1; /* greyscale */ } -static void image_(Main_scale_rowcol)(THTensor *Tsrc, - THTensor *Tdst, - long src_start, - long dst_start, - long src_stride, - long dst_stride, - long src_len, - long dst_len ) { +static void image_(Main_scaleLinear_rowcol)(THTensor *Tsrc, + THTensor *Tdst, + long src_start, + long dst_start, + long src_stride, + long dst_stride, + long src_len, + long dst_len ) { real *src= THTensor_(data)(Tsrc); real *dst= THTensor_(data)(Tdst); @@ -65,12 +65,19 @@ static void image_(Main_scale_rowcol)(THTensor *Tsrc, long si_i; float scale = (float)(src_len - 1) / (dst_len - 1); - for( di = 0; di < dst_len - 1; di++ ) { - long dst_pos = dst_start + di*dst_stride; - si_f = di * scale; si_i = (long)si_f; si_f -= si_i; + if ( src_len == 1 ) { + for( di = 0; di < dst_len - 1; di++ ) { + long dst_pos = dst_start + di*dst_stride; + dst[dst_pos] = src[ src_start ]; + } + } else { + for( di = 0; di < dst_len - 1; di++ ) { + long dst_pos = dst_start + di*dst_stride; + si_f = di * scale; si_i = (long)si_f; si_f -= si_i; - dst[dst_pos] = (1 - si_f) * src[ src_start + si_i * src_stride ] + - si_f * src[ src_start + (si_i + 1) * src_stride ]; + dst[dst_pos] = (1 - si_f) * src[ src_start + si_i * src_stride ] + + si_f * src[ src_start + (si_i + 1) * src_stride ]; + } } dst[ dst_start + (dst_len - 1) * dst_stride ] = @@ -110,6 +117,65 @@ static void image_(Main_scale_rowcol)(THTensor *Tsrc, } } +static void image_(Main_scaleCubic_rowcol)(THTensor *Tsrc, + THTensor *Tdst, + long src_start, + long dst_start, + long src_stride, + long dst_stride, + long src_len, + long dst_len ) { + + real *src= THTensor_(data)(Tsrc); + real *dst= THTensor_(data)(Tdst); + + if ( dst_len == src_len ){ + long i; + for( i = 0; i < dst_len; i++ ) + dst[ dst_start + i*dst_stride ] = src[ src_start + i*src_stride ]; + } else { + long di; + float si_f; + long si_i; + float scale; + if (dst_len == 1) + scale = (float)(src_len - 1); + else + scale = (float)(src_len - 1) / (dst_len - 1); + + for( di = 0; di < dst_len - 1; di++ ) { + long dst_pos = dst_start + di*dst_stride; + si_f = di * scale; si_i = (long)si_f; si_f -= si_i; + + real p0; + real p1 = src[ src_start + si_i * src_stride ]; + real p2 = src[ src_start + (si_i + 1) * src_stride ]; + real p3; + if (si_i > 0) { + p0 = src[ src_start + (si_i - 1) * src_stride ]; + } else { + p0 = 2*p1 - p2; + } + if (si_i + 2 < src_len) { + p3 = src[ src_start + (si_i + 2) * src_stride ]; + } else { + p3 = 2*p2 - p1; + } + + real a0 = p1; + real a1 = -(real)1/(real)2*p0 + (real)1/(real)2*p2; + real a2 = p0 - (real)5/(real)2*p1 + (real)2*p2 - (real)1/(real)2*p3; + real a3 = -(real)1/(real)2*p0 + (real)3/(real)2*p1 - (real)3/(real)2*p2 + + (real)1/(real)2*p3; + + dst[dst_pos] = a0 + si_f * (a1 + si_f * (a2 + a3 * si_f)); + } + + dst[ dst_start + (dst_len - 1) * dst_stride ] = + src[ src_start + (src_len - 1) * src_stride ]; + } +} + static int image_(Main_scaleBilinear)(lua_State *L) { THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor); @@ -147,27 +213,90 @@ static int image_(Main_scaleBilinear)(lua_State *L) { for(k=0;k<image_(Main_op_depth)(Tsrc);k++) { /* compress/expand rows first */ for(j = 0; j < src_height; j++) { - image_(Main_scale_rowcol)(Tsrc, - Ttmp, - 0*src_stride2+j*src_stride1+k*src_stride0, - 0*tmp_stride2+j*tmp_stride1+k*tmp_stride0, - src_stride2, - tmp_stride2, - src_width, - tmp_width ); + image_(Main_scaleLinear_rowcol)(Tsrc, + Ttmp, + 0*src_stride2+j*src_stride1+k*src_stride0, + 0*tmp_stride2+j*tmp_stride1+k*tmp_stride0, + src_stride2, + tmp_stride2, + src_width, + tmp_width ); } /* then columns */ for(i = 0; i < dst_width; i++) { - image_(Main_scale_rowcol)(Ttmp, - Tdst, - i*tmp_stride2+0*tmp_stride1+k*tmp_stride0, - i*dst_stride2+0*dst_stride1+k*dst_stride0, - tmp_stride1, - dst_stride1, - tmp_height, - dst_height ); + image_(Main_scaleLinear_rowcol)(Ttmp, + Tdst, + i*tmp_stride2+0*tmp_stride1+k*tmp_stride0, + i*dst_stride2+0*dst_stride1+k*dst_stride0, + tmp_stride1, + dst_stride1, + tmp_height, + dst_height ); + } + } + THTensor_(free)(Ttmp); + return 0; +} + +static int image_(Main_scaleBicubic)(lua_State *L) { + + THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor); + THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor); + THTensor *Ttmp; + long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height; + long src_stride0, src_stride1, src_stride2, src_width, src_height; + long tmp_stride0, tmp_stride1, tmp_stride2, tmp_width, tmp_height; + long i, j, k; + + image_(Main_op_validate)(L, Tsrc,Tdst); + + int ndims; + if (Tdst->nDimension == 3) ndims = 3; + else ndims = 2; + + Ttmp = THTensor_(newWithSize2d)(Tsrc->size[ndims-2], Tdst->size[ndims-1]); + + dst_stride0= image_(Main_op_stride)(Tdst,0); + dst_stride1= image_(Main_op_stride)(Tdst,1); + dst_stride2= image_(Main_op_stride)(Tdst,2); + src_stride0= image_(Main_op_stride)(Tsrc,0); + src_stride1= image_(Main_op_stride)(Tsrc,1); + src_stride2= image_(Main_op_stride)(Tsrc,2); + tmp_stride0= image_(Main_op_stride)(Ttmp,0); + tmp_stride1= image_(Main_op_stride)(Ttmp,1); + tmp_stride2= image_(Main_op_stride)(Ttmp,2); + dst_width= Tdst->size[ndims-1]; + dst_height= Tdst->size[ndims-2]; + src_width= Tsrc->size[ndims-1]; + src_height= Tsrc->size[ndims-2]; + tmp_width= Ttmp->size[1]; + tmp_height= Ttmp->size[0]; + + for(k=0;k<image_(Main_op_depth)(Tsrc);k++) { + /* compress/expand rows first */ + for(j = 0; j < src_height; j++) { + image_(Main_scaleCubic_rowcol)(Tsrc, + Ttmp, + 0*src_stride2+j*src_stride1+k*src_stride0, + 0*tmp_stride2+j*tmp_stride1+k*tmp_stride0, + src_stride2, + tmp_stride2, + src_width, + tmp_width ); + } + + /* then columns */ + for(i = 0; i < dst_width; i++) { + image_(Main_scaleCubic_rowcol)(Ttmp, + Tdst, + i*tmp_stride2+0*tmp_stride1+k*tmp_stride0, + i*dst_stride2+0*dst_stride1+k*dst_stride0, + tmp_stride1, + dst_stride1, + tmp_height, + dst_height ); } } THTensor_(free)(Ttmp); @@ -272,6 +401,10 @@ static int image_(Main_rotate)(lua_State *L) src= THTensor_(data)(Tsrc); dst= THTensor_(data)(Tdst); + if (dst == src) { + luaL_error(L, "image.rotate: in-place rotate not supported"); + } + dst_stride0 = 0; dst_stride1 = Tdst->stride[Tdst->nDimension-2]; dst_stride2 = Tdst->stride[Tdst->nDimension-1]; @@ -300,11 +433,11 @@ static int image_(Main_rotate)(lua_State *L) if( (Tsrc->nDimension!=Tdst->nDimension) ) luaL_error(L, "image.rotate: src and dst depths do not match"); - xc=src_width/2.0; - yc=src_height/2.0; + xc = (src_width-1)/2.0; + yc = (src_height-1)/2.0; - sin_theta = sinf(theta); - cos_theta = cosf(theta); + sin_theta = sin(theta); + cos_theta = cos(theta); for(j = 0; j < dst_height; j++) { jd=j; @@ -312,9 +445,8 @@ static int image_(Main_rotate)(lua_State *L) float val = -1; id= i; - ii=(long)( cos_theta*(id-xc)-sin_theta*(jd-yc) ); - jj=(long)( cos_theta*(jd-yc)+sin_theta*(id-xc) ); - ii+=(long) xc; jj+=(long) yc; + ii = (long) round(cos_theta*(id-xc) - sin_theta*(jd-yc) + xc); + jj = (long) round(cos_theta*(jd-yc) + sin_theta*(id-xc) + yc); /* rotated corners are blank */ if(ii>src_width-1) val=0; @@ -361,6 +493,10 @@ static int image_(Main_rotateBilinear)(lua_State *L) src= THTensor_(data)(Tsrc); dst= THTensor_(data)(Tdst); + if (dst == src) { + luaL_error(L, "image.rotate: in-place rotate not supported"); + } + dst_stride0 = 0; dst_stride1 = Tdst->stride[Tdst->nDimension-2]; dst_stride2 = Tdst->stride[Tdst->nDimension-1]; @@ -389,8 +525,8 @@ static int image_(Main_rotateBilinear)(lua_State *L) if( (Tsrc->nDimension!=Tdst->nDimension) ) luaL_error(L, "image.rotate: src and dst depths do not match"); - xc=src_width/2.0; - yc=src_height/2.0; + xc = (src_width-1)/2.0; + yc = (src_height-1)/2.0; for(j = 0; j < dst_height; j++) { jd=j; @@ -401,19 +537,22 @@ static int image_(Main_rotateBilinear)(lua_State *L) ri = cos(theta)*(id-xc)-sin(theta)*(jd-yc); rj = cos(theta)*(jd-yc)+sin(theta)*(id-xc); - ii_0=(long)floor(ri); - ii_1=ii_0 + 1; - jj_0=(long)floor(rj); - jj_1=jj_0 + 1; - wi = ri - ii_0; - wj = rj - jj_0; - ii_0+=(long) xc; ii_1+=(long) xc; jj_0+=(long) yc;jj_1+=(long) yc; - - /* rotated corners are blank */ - if(ii_1>src_width-1) val=0; - if(jj_1>src_height-1) val=0; - if(ii_0<0) val=0; - if(jj_0<0) val=0; + ii_0 = (long)floor(ri+xc); + ii_1 = ii_0 + 1; + jj_0 = (long)floor(rj+yc); + jj_1 = jj_0 + 1; + wi = ri+xc-ii_0; + wj = rj+yc-jj_0; + + /* default to the closest value when interpolating on image boundaries (either image pixel or 0) */ + if(ii_1==src_width && wi<0.5) ii_1 = ii_0; + else if(ii_1>=src_width) val=0; + if(jj_1==src_height && wj<0.5) jj_1 = jj_0; + else if(jj_1>=src_height) val=0; + if(ii_0==-1 && wi>0.5) ii_0 = ii_1; + else if(ii_0<0) val=0; + if(jj_0==-1 && wj>0.5) jj_0 = jj_1; + else if(jj_0<0) val=0; if(Tsrc->nDimension==2) { if(val==-1) @@ -450,13 +589,13 @@ static int image_(Main_polar)(lua_State *L) long i, j, k; float id, jd, a, r, m, midY, midX; long ii,jj; - + luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional"); luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional"); - + src= THTensor_(data)(Tsrc); dst= THTensor_(data)(Tdst); - + dst_stride0 = 0; dst_stride1 = Tdst->stride[Tdst->nDimension-2]; dst_stride2 = Tdst->stride[Tdst->nDimension-1]; @@ -467,7 +606,7 @@ static int image_(Main_polar)(lua_State *L) dst_stride0 = Tdst->stride[0]; dst_depth = Tdst->size[0]; } - + src_stride0 = 0; src_stride1 = Tsrc->stride[Tsrc->nDimension-2]; src_stride2 = Tsrc->stride[Tsrc->nDimension-1]; @@ -478,13 +617,13 @@ static int image_(Main_polar)(lua_State *L) src_stride0 = Tsrc->stride[0]; src_depth = Tsrc->size[0]; } - + if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + if( (Tsrc->nDimension!=Tdst->nDimension) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + // compute maximum distance midY = (float) src_height / 2.0; midX = (float) src_width / 2.0; @@ -494,7 +633,7 @@ static int image_(Main_polar)(lua_State *L) else { m = (src_width < src_height) ? midX : midY; } - + // loop to fill polar image for(j = 0; j < dst_height; j++) { // orientation loop jd = (float) j; @@ -503,15 +642,15 @@ static int image_(Main_polar)(lua_State *L) float val = -1; id = (float) i; r = (m * id) / (float) dst_width; // current distance - + jj = (long) floor( r * cos(a) + midY); // y-location in source image ii = (long) floor(-r * sin(a) + midX); // x-location in source image - + if(ii>src_width-1) val=0; if(jj>src_height-1) val=0; if(ii<0) val=0; if(jj<0) val=0; - + if(Tsrc->nDimension==2) { if(val==-1) @@ -543,13 +682,13 @@ static int image_(Main_polarBilinear)(lua_State *L) long i, j, k; float id, jd, a, r, m, midY, midX; long ii_0, ii_1, jj_0, jj_1; - + luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional"); luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional"); - + src= THTensor_(data)(Tsrc); dst= THTensor_(data)(Tdst); - + dst_stride0 = 0; dst_stride1 = Tdst->stride[Tdst->nDimension-2]; dst_stride2 = Tdst->stride[Tdst->nDimension-1]; @@ -560,7 +699,7 @@ static int image_(Main_polarBilinear)(lua_State *L) dst_stride0 = Tdst->stride[0]; dst_depth = Tdst->size[0]; } - + src_stride0 = 0; src_stride1 = Tsrc->stride[Tsrc->nDimension-2]; src_stride2 = Tsrc->stride[Tsrc->nDimension-1]; @@ -571,13 +710,13 @@ static int image_(Main_polarBilinear)(lua_State *L) src_stride0 = Tsrc->stride[0]; src_depth = Tsrc->size[0]; } - + if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + if( (Tsrc->nDimension!=Tdst->nDimension) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + // compute maximum distance midY = (float) src_height / 2.0; midX = (float) src_width / 2.0; @@ -587,7 +726,7 @@ static int image_(Main_polarBilinear)(lua_State *L) else { m = (src_width < src_height) ? midX : midY; } - + // loop to fill polar image for(j = 0; j < dst_height; j++) { // orientation loop jd = (float) j; @@ -597,24 +736,24 @@ static int image_(Main_polarBilinear)(lua_State *L) real ri, rj, wi, wj; id = (float) i; r = (m * id) / (float) dst_width; // current distance - + rj = r * cos(a) + midY; // y-location in source image ri = -r * sin(a) + midX; // x-location in source image - + ii_0=(long)floor(ri); ii_1=ii_0 + 1; jj_0=(long)floor(rj); jj_1=jj_0 + 1; wi = ri - ii_0; wj = rj - jj_0; - + // switch to nearest interpolation when bilinear is impossible if(ii_1>src_width-1 || jj_1>src_height-1 || ii_0<0 || jj_0<0) { if(ii_0>src_width-1) val=0; if(jj_0>src_height-1) val=0; if(ii_0<0) val=0; if(jj_0<0) val=0; - + if(Tsrc->nDimension==2) { if(val==-1) @@ -632,7 +771,7 @@ static int image_(Main_polarBilinear)(lua_State *L) } } } - + // bilinear interpolation else { if(Tsrc->nDimension==2) { @@ -671,13 +810,13 @@ static int image_(Main_logPolar)(lua_State *L) long i, j, k; float id, jd, a, r, m, midY, midX, fw; long ii,jj; - + luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional"); luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional"); - + src= THTensor_(data)(Tsrc); dst= THTensor_(data)(Tdst); - + dst_stride0 = 0; dst_stride1 = Tdst->stride[Tdst->nDimension-2]; dst_stride2 = Tdst->stride[Tdst->nDimension-1]; @@ -688,7 +827,7 @@ static int image_(Main_logPolar)(lua_State *L) dst_stride0 = Tdst->stride[0]; dst_depth = Tdst->size[0]; } - + src_stride0 = 0; src_stride1 = Tsrc->stride[Tsrc->nDimension-2]; src_stride2 = Tsrc->stride[Tsrc->nDimension-1]; @@ -699,13 +838,13 @@ static int image_(Main_logPolar)(lua_State *L) src_stride0 = Tsrc->stride[0]; src_depth = Tsrc->size[0]; } - + if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + if( (Tsrc->nDimension!=Tdst->nDimension) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + // compute maximum distance midY = (float) src_height / 2.0; midX = (float) src_width / 2.0; @@ -715,7 +854,7 @@ static int image_(Main_logPolar)(lua_State *L) else { m = (src_width < src_height) ? midX : midY; } - + // loop to fill polar image fw = log(m) / (float) dst_width; for(j = 0; j < dst_height; j++) { // orientation loop @@ -724,17 +863,17 @@ static int image_(Main_logPolar)(lua_State *L) for(i = 0; i < dst_width; i++) { // radius loop float val = -1; id = (float) i; - + r = exp(id * fw); - + jj = (long) floor( r * cos(a) + midY); // y-location in source image ii = (long) floor(-r * sin(a) + midX); // x-location in source image - + if(ii>src_width-1) val=0; if(jj>src_height-1) val=0; if(ii<0) val=0; if(jj<0) val=0; - + if(Tsrc->nDimension==2) { if(val==-1) @@ -766,13 +905,13 @@ static int image_(Main_logPolarBilinear)(lua_State *L) long i, j, k; float id, jd, a, r, m, midY, midX, fw; long ii_0, ii_1, jj_0, jj_1; - + luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional"); luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional"); - + src= THTensor_(data)(Tsrc); dst= THTensor_(data)(Tdst); - + dst_stride0 = 0; dst_stride1 = Tdst->stride[Tdst->nDimension-2]; dst_stride2 = Tdst->stride[Tdst->nDimension-1]; @@ -783,7 +922,7 @@ static int image_(Main_logPolarBilinear)(lua_State *L) dst_stride0 = Tdst->stride[0]; dst_depth = Tdst->size[0]; } - + src_stride0 = 0; src_stride1 = Tsrc->stride[Tsrc->nDimension-2]; src_stride2 = Tsrc->stride[Tsrc->nDimension-1]; @@ -794,13 +933,13 @@ static int image_(Main_logPolarBilinear)(lua_State *L) src_stride0 = Tsrc->stride[0]; src_depth = Tsrc->size[0]; } - + if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + if( (Tsrc->nDimension!=Tdst->nDimension) ) { luaL_error(L, "image.polar: src and dst depths do not match"); } - + // compute maximum distance midY = (float) src_height / 2.0; midX = (float) src_width / 2.0; @@ -810,7 +949,7 @@ static int image_(Main_logPolarBilinear)(lua_State *L) else { m = (src_width < src_height) ? midX : midY; } - + // loop to fill polar image fw = log(m) / (float) dst_width; for(j = 0; j < dst_height; j++) { // orientation loop @@ -820,26 +959,26 @@ static int image_(Main_logPolarBilinear)(lua_State *L) float val = -1; real ri, rj, wi, wj; id = (float) i; - + r = exp(id * fw); - + rj = r * cos(a) + midY; // y-location in source image ri = -r * sin(a) + midX; // x-location in source image - + ii_0=(long)floor(ri); ii_1=ii_0 + 1; jj_0=(long)floor(rj); jj_1=jj_0 + 1; wi = ri - ii_0; wj = rj - jj_0; - + // switch to nearest interpolation when bilinear is impossible if(ii_1>src_width-1 || jj_1>src_height-1 || ii_0<0 || jj_0<0) { if(ii_0>src_width-1) val=0; if(jj_0>src_height-1) val=0; if(ii_0<0) val=0; if(jj_0<0) val=0; - + if(Tsrc->nDimension==2) { if(val==-1) @@ -857,7 +996,7 @@ static int image_(Main_logPolarBilinear)(lua_State *L) } } } - + // bilinear interpolation else { if(Tsrc->nDimension==2) { @@ -1339,8 +1478,6 @@ int image_(Main_vflip)(lua_State *L) { int width = dst->size[2]; int height = dst->size[1]; - int src_width = src->size[2]; - int src_height = src->size[1]; int channels = dst->size[0]; long *is = src->stride; long *os = dst->stride; @@ -1388,8 +1525,6 @@ int image_(Main_hflip)(lua_State *L) { int width = dst->size[2]; int height = dst->size[1]; - int src_width = src->size[2]; - int src_height = src->size[1]; int channels = dst->size[0]; long *is = src->stride; long *os = dst->stride; @@ -1434,12 +1569,12 @@ int image_(Main_flip)(lua_State *L) { THTensor *dst = luaT_checkudata(L, 1, torch_Tensor); THTensor *src = luaT_checkudata(L, 2, torch_Tensor); long flip_dim = luaL_checklong(L, 3); - - if (dst->nDimension != src->nDimension) { - luaL_error(L, "image.flip: src and dst nDimension does not match"); + + if ((dst->nDimension != 5) || (src->nDimension != 5)) { + luaL_error(L, "image.flip: expected 5 dimensions for src and dst"); } - - if (flip_dim < 1 || flip_dim > dst->nDimension) { + + if (flip_dim < 1 || flip_dim > dst->nDimension || flip_dim > 5) { luaL_error(L, "image.flip: flip_dim out of bounds"); } flip_dim--; // Make it zero indexed @@ -1447,27 +1582,26 @@ int image_(Main_flip)(lua_State *L) { // get raw pointers real *dst_data = THTensor_(data)(dst); real *src_data = THTensor_(data)(src); - if (dst_data == src_data) { + if (dst_data == src_data) { luaL_error(L, "image.flip: in-place flip not supported"); } - + long size0 = dst->size[0]; long size1 = dst->size[1]; long size2 = dst->size[2]; long size3 = dst->size[3]; long size4 = dst->size[4]; - long size_flip = dst->size[flip_dim]; - - if (src->size[0] != size0 || src->size[1] != size1 || - src->size[2] != size2 || src->size[3] != size3 || + + if (src->size[0] != size0 || src->size[1] != size1 || + src->size[2] != size2 || src->size[3] != size3 || src->size[4] != size4) { luaL_error(L, "image.flip: src and dst are not the same size"); } - + long *is = src->stride; long *os = dst->stride; - long x, y, z, d, t, isrc, idst; + long x, y, z, d, t, isrc, idst = 0; for (t = 0; t < size0; t++) { for (d = 0; d < size1; d++) { for (z = 0; z < size2; z++) { @@ -1492,7 +1626,7 @@ int image_(Main_flip)(lua_State *L) { case 4: idst = t*os[0] + d*os[1] + z*os[2] + y*os[3] + (size4 - x - 1)*os[4]; break; - } + } dst_data[ idst ] = src_data[ isrc ]; } } @@ -1503,6 +1637,48 @@ int image_(Main_flip)(lua_State *L) { return 0; } +static inline real image_(Main_cubicInterpolate)(real p0, real p1, real p2, real p3, real x) +{ + return p1 + 0.5 * x * (p2 - p0 + x * (2 * p0 - 5 * p1 + 4 * p2 - p3 + x * (3 * (p1 - p2) + p3 - p0))); +} + +static inline void image_(Main_bicubicInterpolate)( + real* src, long* is, long* size, real ix, real iy, + real* dst, long *os, + real pad_value, int bounds_check) +{ + int i, j, k; + real arr[4], p[4]; + + // Calculate fractional and integer components + long x_pix = floor(ix); + long y_pix = floor(iy); + real dx = ix - (real)x_pix; + real dy = iy - (real)y_pix; + + for (k=0; k<size[0]; k++) { + #pragma unroll + for (i = 0; i < 4; i++) { + long v = y_pix + i - 1; + real* data = &src[k * is[0] + v * is[1]]; + + #pragma unroll + for (j = 0; j < 4; j++) { + long u = x_pix + j - 1; + if (bounds_check && (v < 0 || v >= size[1] || u < 0 || u >= size[2])) { + p[j] = pad_value; + } else { + p[j] = data[u * is[2]]; + } + } + + arr[i] = image_(Main_cubicInterpolate)(p[0], p[1], p[2], p[3], dx); + } + + dst[k * os[0]] = image_(Main_cubicInterpolate)(arr[0], arr[1], arr[2], arr[3], dy); + } +} + /* * Warps an image, according to an (x,y) flow field. The flow * field is in the space of the destination image, each vector @@ -1515,6 +1691,7 @@ int image_(Main_warp)(lua_State *L) { int mode = lua_tointeger(L, 4); int offset_mode = lua_toboolean(L, 5); int clamp_mode = lua_tointeger(L, 6); + real pad_value = (real)lua_tonumber(L, 7); // dims int width = dst->size[2]; @@ -1532,7 +1709,7 @@ int image_(Main_warp)(lua_State *L) { real *flow_data = THTensor_(data)(flowfield); // resample - long k,x,y,jj,v,u,i,j; + long k,x,y,v,u,i,j; for (y=0; y<height; y++) { for (x=0; x<width; x++) { // subpixel position: @@ -1551,7 +1728,7 @@ int image_(Main_warp)(lua_State *L) { if (off_image == 1 && clamp_mode == 1) { // We're off the image and we're clamping the input image to 0 for (k=0; k<channels; k++) { - dst_data[ k*os[0] + y*os[1] + x*os[2] ] = 0; + dst_data[ k*os[0] + y*os[1] + x*os[2] ] = pad_value; } } else { ix = MAX(ix,0); ix = MIN(ix,src_width-1); @@ -1586,7 +1763,7 @@ int image_(Main_warp)(lua_State *L) { + src_data[ k*is[0] + MIN(iy_se,src_height-1)*is[1] + MIN(ix_se,src_width-1)*is[2] ] * se; } } - break; + break; case 0: // Simple (i.e., nearest neighbor) { // 1 nearest neighbor: @@ -1601,65 +1778,19 @@ int image_(Main_warp)(lua_State *L) { break; case 2: // Bicubic { - // Calculate fractional and integer components - long x_pix = floor(ix); - long y_pix = floor(iy); - real dx = ix - (real)x_pix; - real dy = iy - (real)y_pix; - - real C[4]; - for (k=0; k<channels; k++) { - // Sweep by rows through the samples (to calculate final cubic coefs) - for (jj = 0; jj <= 3; jj++) { - v = y_pix - 1 + jj; - // We need to clamp all uv values to image border: hopefully - // branch prediction and compiler reordering takes care of all - // the conditionals (since the branch probabilities are heavily - // skewed). Alternatively an inline "getPixelSafe" function would - // would be clearer here, but cannot be done with lua? - v = MAX(MIN((long)(src_height-1), v), 0); - long ofst = k * is[0] + v * is[1]; - u = x_pix; - u = MAX(MIN((long)(src_width-1), u), 0); - real a0 = src_data[ofst + u * is[2]]; - u = x_pix - 1; - u = MAX(MIN((long)(src_width-1), u), 0); - real d0 = src_data[ofst + u * is[2]] - a0; - u = x_pix + 1; - u = MAX(MIN((long)(src_width-1), u), 0); - real d2 = src_data[ofst + u * is[2]] - a0; - u = x_pix + 2; - u = MAX(MIN((long)(src_width-1), u), 0); - real d3 = src_data[ofst + u * is[2]] - a0; - - // Note: there are mostly static casts, optimizer will take care of - // of it for us (prevents compiler warnings in new gcc) - real a1 = -(real)1/(real)3*d0 + d2 -(real)1/(real)6*d3; - real a2 = (real)1/(real)2*d0 + (real)1/(real)2*d2; - real a3 = -(real)1/(real)6*d0 - (real)1/(real)2*d2 + - (real)1/(real)6*d3; - C[jj] = a0 + dx * (a1 + dx * (a2 + a3 * dx)); - } - - real d0 = C[0]-C[1]; - real d2 = C[2]-C[1]; - real d3 = C[3]-C[1]; - real a0 = C[1]; - real a1 = -(real)1/(real)3*d0 + d2 - (real)1/(real)6*d3; - real a2 = (real)1/(real)2*d0 + (real)1/(real)2*d2; - real a3 = -(real)1/(real)6*d0 - (real)1/(real)2*d2 + - (real)1/(real)6*d3; - real Cc = a0 + dy * (a1 + dy * (a2 + a3 * dy)); - - // I assume that since the image is stored as reals we don't have - // to worry about clamping to min and max int (to prevent over or - // underflow) - dst_data[ k*os[0] + y*os[1] + x*os[2] ] = Cc; - } + // We only need to do bounds checking if ix or iy are near the edge + int edge = !(iy >= 1 && iy < src_height - 2 && ix >= 1 && ix < src_width - 2); + + real* dst = dst_data + y*os[1] + x*os[2]; + if (edge) { + image_(Main_bicubicInterpolate)(src_data, is, src->size, ix, iy, dst, os, pad_value, 1); + } else { + image_(Main_bicubicInterpolate)(src_data, is, src->size, ix, iy, dst, os, pad_value, 0); + } } break; case 3: // Lanczos - { + { // Note: Lanczos can be made fast if the resampling period is // constant... and therefore the Lu, Lv can be cached and reused. // However, unfortunately warp makes no assumptions about resampling @@ -1677,7 +1808,7 @@ int image_(Main_warp)(lua_State *L) { long y_pix = floor(iy); // Precalculate the L(x) function evaluations in the u and v direction - const long rad = 3; // This is a tunable parameter: 2 to 3 is OK + #define rad (3) // This is a tunable parameter: 2 to 3 is OK float Lu[2 * rad]; // L(x) for u direction float Lv[2 * rad]; // L(x) for v direction for (u=x_pix-rad+1, i=0; u<=x_pix+rad; u++, i++) { @@ -1826,7 +1957,11 @@ int image_(Main_colorize)(lua_State *L) { for (x = 0; x < width; x++) { int id = THTensor_(get2d)(input, y, x); real check = THTensor_(get2d)(colormap, id, 0); +#ifdef TH_REAL_IS_BYTE + if (check == 255) { +#else if (check == -1) { +#endif for (k = 0; k < channels; k++) { THTensor_(set2d)(colormap, id, k, ((float)rand()/(float)RAND_MAX)); } @@ -1849,9 +1984,9 @@ int image_(Main_rgb2y)(lua_State *L) { luaL_argcheck(L, rgb->nDimension == 3, 1, "image.rgb2y: src not 3D"); luaL_argcheck(L, yim->nDimension == 2, 2, "image.rgb2y: dst not 2D"); luaL_argcheck(L, rgb->size[1] == yim->size[0], 2, - "image.rgb2y: src and dst not of same height"); + "image.rgb2y: src and dst not of same height"); luaL_argcheck(L, rgb->size[2] == yim->size[1], 2, - "image.rgb2y: src and dst not of same width"); + "image.rgb2y: src and dst not of same width"); int y,x; real r,g,b,yc; @@ -1865,8 +2000,8 @@ int image_(Main_rgb2y)(lua_State *L) { b = THTensor_(get3d)(rgb, 2, y, x); yc = (real) ((0.299 * (float) r) - + (0.587 * (float) g) - + (0.114 * (float) b)); + + (0.587 * (float) g) + + (0.114 * (float) b)); THTensor_(set2d)(yim, y, x, yc); } } @@ -1876,6 +2011,7 @@ int image_(Main_rgb2y)(lua_State *L) { static const struct luaL_Reg image_(Main__) [] = { {"scaleSimple", image_(Main_scaleSimple)}, {"scaleBilinear", image_(Main_scaleBilinear)}, + {"scaleBicubic", image_(Main_scaleBicubic)}, {"rotate", image_(Main_rotate)}, {"rotateBilinear", image_(Main_rotateBilinear)}, {"polar", image_(Main_polar)}, diff --git a/generic/png.c b/generic/png.c index b98028a..24852de 100755 --- a/generic/png.c +++ b/generic/png.c @@ -10,29 +10,8 @@ * * Clement: modified for Torch7. */ -#include <assert.h> -/* - * Bookkeeping struct for reading png data from memory - */ -typedef struct { - unsigned char* buffer; - png_size_t offset; - png_size_t length; -} libpng_(inmem_buffer); - -/* - * Call back for reading png data from memory - */ -void libpng_(userReadData)(png_structp pngPtrSrc, png_bytep dest, png_size_t length) -{ - libpng_(inmem_buffer)* src = png_get_io_ptr(pngPtrSrc); - assert(src->offset+length <= src->length); - memcpy(dest, src->buffer + src->offset, length); - src->offset += length; -} - -static int libpng_(Main_load)(lua_State *L) +static int libpng_(Main_load)(lua_State *L) { png_byte header[8]; // 8 is the maximum size that can be checked @@ -45,8 +24,9 @@ static int libpng_(Main_load)(lua_State *L) png_bytep * row_pointers; size_t fread_ret; FILE* fp; - libpng_(inmem_buffer) inmem = {0}; /* source memory (if loading from memory) */ - + libpng_inmem_buffer inmem = {0}; /* source memory (if loading from memory) */ + libpng_errmsg errmsg; + const int load_from_file = luaL_checkint(L, 1); if (load_from_file == 1){ @@ -76,18 +56,20 @@ static int libpng_(Main_load)(lua_State *L) if (!png_ptr) luaL_error(L, "[read_png] png_create_read_struct failed"); + png_set_error_fn(png_ptr, &errmsg, libpng_error_fn, NULL); + info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) luaL_error(L, "[read_png] png_create_info_struct failed"); if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[read_png] Error during init_io"); + luaL_error(L, "[read_png] Error during init_io: %s", errmsg.str); if (load_from_file == 1){ png_init_io(png_ptr, fp); } else { /* set the read callback */ - png_set_read_fn(png_ptr,(png_voidp)&inmem, libpng_(userReadData)); + png_set_read_fn(png_ptr,(png_voidp)&inmem, libpng_userReadData); } png_set_sig_bytes(png_ptr, 8); png_read_info(png_ptr, info_ptr); @@ -96,7 +78,7 @@ static int libpng_(Main_load)(lua_State *L) height = png_get_image_height(png_ptr, info_ptr); color_type = png_get_color_type(png_ptr, info_ptr); bit_depth = png_get_bit_depth(png_ptr, info_ptr); - png_read_update_info(png_ptr, info_ptr); + /* get depth */ int depth = 0; @@ -109,7 +91,6 @@ static int libpng_(Main_load)(lua_State *L) if(bit_depth < 8) { png_set_expand_gray_1_2_4_to_8(png_ptr); - png_read_update_info(png_ptr, info_ptr); } depth = 1; } @@ -119,7 +100,6 @@ static int libpng_(Main_load)(lua_State *L) { depth = 3; png_set_expand(png_ptr); - png_read_update_info(png_ptr, info_ptr); } else luaL_error(L, "[read_png_file] Unknown color space"); @@ -127,12 +107,13 @@ static int libpng_(Main_load)(lua_State *L) if(bit_depth < 8) { png_set_strip_16(png_ptr); - png_read_update_info(png_ptr, info_ptr); } + + png_read_update_info(png_ptr, info_ptr); /* read file */ if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[read_png_file] Error during read_image"); + luaL_error(L, "[read_png_file] Error during read_image: %s", errmsg.str); /* alloc tensor */ THTensor *tensor = THTensor_(newWithSize3d)(depth, height, width); @@ -216,6 +197,7 @@ static int libpng_(Main_save)(lua_State *L) png_structp png_ptr; png_infop info_ptr; png_bytep * row_pointers; + libpng_errmsg errmsg; /* get dims and contiguous tensor */ THTensor *tensorc = THTensor_(newContiguous)(tensor); @@ -250,18 +232,20 @@ static int libpng_(Main_save)(lua_State *L) if (!png_ptr) luaL_error(L, "[write_png_file] png_create_write_struct failed"); + png_set_error_fn(png_ptr, &errmsg, libpng_error_fn, NULL); + info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) luaL_error(L, "[write_png_file] png_create_info_struct failed"); if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[write_png_file] Error during init_io"); + luaL_error(L, "[write_png_file] Error during init_io: %s", errmsg.str); png_init_io(png_ptr, fp); /* write header */ if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[write_png_file] Error during writing header"); + luaL_error(L, "[write_png_file] Error during writing header: %s", errmsg.str); png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, color_type, PNG_INTERLACE_NONE, @@ -289,13 +273,13 @@ static int libpng_(Main_save)(lua_State *L) /* write bytes */ if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[write_png_file] Error during writing bytes"); + luaL_error(L, "[write_png_file] Error during writing bytes: %s", errmsg.str); png_write_image(png_ptr, row_pointers); /* end write */ if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[write_png_file] Error during end of write"); + luaL_error(L, "[write_png_file] Error during end of write: %s", errmsg.str); /* cleanup png structs */ png_write_end(png_ptr, NULL); @@ -322,6 +306,7 @@ static int libpng_(Main_size)(lua_State *L) png_structp png_ptr; png_infop info_ptr; + libpng_errmsg errmsg; size_t fread_ret; /* open file and test for it being a png */ FILE *fp = fopen(filename, "rb"); @@ -339,14 +324,16 @@ static int libpng_(Main_size)(lua_State *L) if (!png_ptr) luaL_error(L, "[get_png_size] png_create_read_struct failed"); - + + png_set_error_fn(png_ptr, &errmsg, libpng_error_fn, NULL); + info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) luaL_error(L, "[get_png_size] png_create_info_struct failed"); if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[get_png_size] Error during init_io"); - + luaL_error(L, "[get_png_size] Error during init_io: %s", errmsg.str); + png_init_io(png_ptr, fp); png_set_sig_bytes(png_ptr, 8); @@ -372,10 +359,6 @@ static int libpng_(Main_size)(lua_State *L) else luaL_error(L, "[get_png_size] Unknown color space"); - /* read file */ - if (setjmp(png_jmpbuf(png_ptr))) - luaL_error(L, "[get_png_size] Error during read_image"); - /* done with file */ fclose(fp); diff --git a/image-1.1.alpha-0.rockspec b/image-1.1.alpha-0.rockspec index b7bde31..2c498db 100644 --- a/image-1.1.alpha-0.rockspec +++ b/image-1.1.alpha-0.rockspec @@ -25,7 +25,7 @@ dependencies = { build = { type = "command", build_command = [[ -cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) +cmake -E make_directory build && cd build && cmake .. -DLUALIB=$(LUALIB) -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE) ]], install_command = "cd build && $(MAKE) install" } @@ -93,12 +93,15 @@ local function todepth(img, depth) end local function isPNG(magicTensor) - pngMagic = torch.ByteTensor({0x89,0x50,0x4e,0x47}) + local pngMagic = torch.ByteTensor({0x89,0x50,0x4e,0x47}) return torch.all(torch.eq(magicTensor, pngMagic)) end local function isJPG(magicTensor) - jpgMagic = torch.ByteTensor({0xff, 0xd8, 0xff, 0xe0}) + -- There are many valid 4th bytes, so only check the first 3 bytes. + -- libjpeg should support most if not all of these: + -- source: http://filesignatures.net/?page=all&order=SIGNATURE&alpha=J + local jpgMagic = torch.ByteTensor({0xff, 0xd8, 0xff}) return torch.all(torch.eq(magicTensor, jpgMagic)) end @@ -107,7 +110,7 @@ local function decompress(tensor, depth, tensortype) dok.error('Input tensor must be a byte tensor', 'image.decompress') end - if isJPG(tensor[{{1,4}}]) then + if isJPG(tensor[{{1,3}}]) then return image.decompressJPG(tensor, depth, tensortype) elseif isPNG(tensor[{{1,4}}]) then return image.decompressPNG(tensor, depth, tensortype) @@ -138,15 +141,22 @@ local function loadPNG(filename, depth, tensortype) end rawset(image, 'loadPNG', loadPNG) +local function clampImage(tensor) + if tensor:type() == 'torch.ByteTensor' then + return tensor + end + local a = torch.Tensor():resize(tensor:size()):copy(tensor) + a.image.saturate(a) -- bound btwn 0 and 1 + a:mul(255) -- remap to [0..255] + return a +end + local function savePNG(filename, tensor) if not xlua.require 'libpng' then dok.error('libpng package not found, please install libpng','image.savePNG') end - local MAXVAL = 255 - local a = torch.Tensor():resize(tensor:size()):copy(tensor) - a.image.saturate(a) -- bound btwn 0 and 1 - a:mul(MAXVAL) -- remap to [0..255] - a.libpng.save(filename, a) + tensor = clampImage(tensor) + tensor.libpng.save(filename, tensor) end rawset(image, 'savePNG', savePNG) @@ -160,11 +170,11 @@ local function decompressPNG(tensor, depth, tensortype) 'image.decompressPNG') end local load_from_file = 0 - local a = template(tensortype).libpng.load(load_from_file, tensor) + local a, bit_depth = template(tensortype).libpng.load(load_from_file, tensor) if a == nil then return nil else - return processPNG(a, depth, tensortype) + return processPNG(a, depth, bit_depth, tensortype) end end rawset(image, 'decompressPNG', decompressPNG) @@ -222,13 +232,10 @@ local function saveJPG(filename, tensor) if not xlua.require 'libjpeg' then dok.error('libjpeg package not found, please install libjpeg','image.saveJPG') end - local MAXVAL = 255 - local a = torch.Tensor():resize(tensor:size()):copy(tensor) - a.image.saturate(a) -- bound btwn 0 and 1 - a:mul(MAXVAL) -- remap to [0..255] + tensor = clampImage(tensor) local save_to_file = 1 local quality = 75 - a.libjpeg.save(filename, a, save_to_file, quality) + tensor.libjpeg.save(filename, tensor, save_to_file, quality) end rawset(image, 'saveJPG', saveJPG) @@ -244,14 +251,11 @@ local function compressJPG(tensor, quality) dok.error('libjpeg package not found, please install libjpeg', 'image.compressJPG') end - local MAXVAL = 255 - local a = torch.Tensor():resize(tensor:size()):copy(tensor) - a.image.saturate(a) -- bound btwn 0 and 1 - a:mul(MAXVAL) -- remap to [0..255] + tensor = clampImage(tensor) local b = torch.ByteTensor() local save_to_file = 0 quality = quality or 75 - a.libjpeg.save("", a, save_to_file, quality, b) + tensor.libjpeg.save("", tensor, save_to_file, quality, b) return b end rawset(image, 'compressJPG', compressJPG) @@ -280,11 +284,8 @@ local function savePPM(filename, tensor) if tensor:nDimension() ~= 3 or tensor:size(1) ~= 3 then dok.error('can only save 3xHxW images as PPM', 'image.savePPM') end - local MAXVAL = 255 - local a = torch.Tensor():resize(tensor:size()):copy(tensor) - a.image.saturate(a) -- bound btwn 0 and 1 - a:mul(MAXVAL) -- remap to [0..255] - a.libppm.save(filename, a) + tensor = clampImage(tensor) + tensor.libppm.save(filename, tensor) end rawset(image, 'savePPM', savePPM) @@ -293,11 +294,8 @@ local function savePGM(filename, tensor) if tensor:nDimension() == 3 and tensor:size(1) ~= 1 then dok.error('can only save 1xHxW or HxW images as PGM', 'image.savePGM') end - local MAXVAL = 255 - local a = torch.Tensor():resize(tensor:size()):copy(tensor) - a.image.saturate(a) -- bound btwn 0 and 1 - a:mul(MAXVAL) -- remap to [0..255] - a.libppm.save(filename, a) + tensor = clampImage(tensor) + tensor.libppm.save(filename, tensor) end rawset(image, 'savePGM', savePGM) @@ -365,6 +363,7 @@ rawset(image, 'save', save) -- local function crop(...) local dst,src,startx,starty,endx,endy + local format,width,height local args = {...} if select('#',...) == 6 then dst = args[1] @@ -374,16 +373,31 @@ local function crop(...) endx = args[5] endy = args[6] elseif select('#',...) == 5 then - src = args[1] - startx = args[2] - starty = args[3] - endx = args[4] - endy = args[5] + if type(args[3]) == 'string' then + dst = args[1] + src = args[2] + format = args[3] + width = args[4] + height = args[5] + else + src = args[1] + startx = args[2] + starty = args[3] + endx = args[4] + endy = args[5] + end elseif select('#',...) == 4 then - dst = args[1] - src = args[2] - startx = args[3] - starty = args[4] + if type(args[2]) == 'string' then + src = args[1] + format = args[2] + width = args[3] + height = args[4] + else + dst = args[1] + src = args[2] + startx = args[3] + starty = args[4] + end elseif select('#',...) == 3 then src = args[1] startx = args[2] @@ -402,9 +416,43 @@ local function crop(...) {type='number', help='start x', req=true}, {type='number', help='start y', req=true}, {type='number', help='end x'}, - {type='number', help='end y'})) + {type='number', help='end y'}, + '', + {type='torch.Tensor', help='input image', req=true}, + {type='string', help='format: "c" or "tl" or "tr" or "bl" or "br"', req=true}, + {type='number', help='width', req=true}, + {type='number', help='height', req=true}, + '', + {type='torch.Tensor', help='destination', req=true}, + {type='torch.Tensor', help='input image', req=true}, + {type='string', help='format: "c" or "tl" or "tr" or "bl" or "br"', req=true}, + {type='number', help='width', req=true}, + {type='number', help='height', req=true})) dok.error('incorrect arguments', 'image.crop') end + if format then + local iwidth,iheight + if src:nDimension() == 3 then + iwidth,iheight = src:size(3),src:size(2) + else + iwidth,iheight = src:size(2),src:size(1) + end + local x1, x2 + if format == 'c' then + x1, y1 = math.floor((iwidth-width)/2), math.floor((iheight-height)/2) + elseif format == 'tl' then + x1, y1 = 0, 0 + elseif format == 'tr' then + x1, y1 = iwidth-width, 0 + elseif format == 'bl' then + x1, y1 = 0, iheight-height + elseif format == 'br' then + x1, y1 = iwidth-width, iheight-height + else + error('crop format must be "c"|"tl"|"tr"|"bl"|"br"') + end + return crop(dst, src, x1, y1, x1+width, y1+height) + end if endx==nil then return src.image.cropNoScale(src,dst,startx,starty) else @@ -500,38 +548,63 @@ local function scale(...) {type='torch.Tensor', help='input image', req=true}, {type='number', help='destination width', req=true}, {type='number', help='destination height', req=true}, - {type='string', help='mode: bilinear | simple', default='bilinear'}, + {type='string', help='mode: bilinear | bicubic |simple', default='bilinear'}, '', {type='torch.Tensor', help='input image', req=true}, - {type='string | number', help='destination size: "WxH" or "MAX" or "^MIN" or MAX', req=true}, - {type='string', help='mode: bilinear | simple', default='bilinear'}, + {type='string | number', help='destination size: "WxH" or "MAX" or "^MIN" or "*SC" or "*SCd/SCn" or MAX', req=true}, + {type='string', help='mode: bilinear | bicubic | simple', default='bilinear'}, '', {type='torch.Tensor', help='destination image', req=true}, {type='torch.Tensor', help='input image', req=true}, - {type='string', help='mode: bilinear | simple', default='bilinear'})) + {type='string', help='mode: bilinear | bicubic | simple', default='bilinear'})) dok.error('incorrect arguments', 'image.scale') end if size then - local iwidth,iheight + local iwidth, iheight if src:nDimension() == 3 then - iwidth,iheight = src:size(3),src:size(2) + iwidth, iheight = src:size(3),src:size(2) else - iwidth,iheight = src:size(2),src:size(1) + iwidth, iheight = src:size(2),src:size(1) end - local imax = math.max(iwidth,iheight) + + -- MAX? + local imax = math.max(iwidth, iheight) local omax = tonumber(size) if omax then - height = iheight / imax * omax - width = iwidth / imax * omax - else - width,height = size:gfind('(%d*)x(%d*)')() - if not width or not height then - local imin = math.min(iwidth,iheight) - local omin = size:gfind('%^(%d*)')() - if omin then - height = iheight / imin * omin - width = iwidth / imin * omin - end + height = iheight*omax/imax + width = iwidth*omax/imax + end + + -- WxH? + if not width or not height then + width, height = size:match('(%d+)x(%d+)') + end + + -- ^MIN? + if not width or not height then + local imin = math.min(iwidth, iheight) + local omin = tonumber(size:match('%^(%d+)')) + if omin then + height = iheight*omin/imin + width = iwidth*omin/imin + end + end + + -- *SCn/SCd? + if not width or not height then + local scn, scd = size:match('%*(%d+)%/(%d+)') + if scn and scd then + height = iheight*scn/scd + width = iwidth*scn/scd + end + end + + -- *SC? + if not width or not height then + local sc = tonumber(size:match('%*(.+)')) + if sc then + height = iheight*sc + width = iwidth*sc end end end @@ -548,10 +621,12 @@ local function scale(...) mode = mode or 'bilinear' if mode=='bilinear' then src.image.scaleBilinear(src,dst) + elseif mode=='bicubic' then + src.image.scaleBicubic(src,dst) elseif mode=='simple' then src.image.scaleSimple(src,dst) else - dok.error('mode must be one of: simple | bilinear', 'image.scale') + dok.error('mode must be one of: simple | bicubic | bilinear', 'image.scale') end return dst end @@ -764,6 +839,7 @@ local function warp(...) local mode = 'bilinear' local offset_mode = true local clamp_mode = 'clamp' + local pad_value = 0 local args = {...} local nargs = select('#',...) local bad_args = false @@ -778,7 +854,12 @@ local function warp(...) mode = args[3] if nargs >= 4 then offset_mode = args[4] end if nargs >= 5 then clamp_mode = args[5] end - if nargs >= 6 then bad_args = true end + if nargs >= 6 then + assert(clamp_mode == 'pad', 'pad_value can only be specified if' .. + ' clamp_mode = "pad"') + pad_value = args[6] + end + if nargs >= 7 then bad_args = true end else -- With Destination tensor dst = args[1] @@ -787,7 +868,12 @@ local function warp(...) if nargs >= 4 then mode = args[4] end if nargs >= 5 then offset_mode = args[5] end if nargs >= 6 then clamp_mode = args[6] end - if nargs >= 7 then bad_args = true end + if nargs >= 7 then + assert(clamp_mode == 'pad', 'pad_value can only be specified if' .. + ' clamp_mode = "pad"') + pad_value = args[7] + end + if nargs >= 8 then bad_args = true end end end if bad_args then @@ -804,7 +890,8 @@ local function warp(...) {type='torch.Tensor', help='(y,x) flow field (2xHxW)', req=true}, {type='string', help='mode: lanczos | bicubic | bilinear | simple', default='bilinear'}, {type='string', help='offset mode (add (x,y) to flow field)', default=true}, - {type='string', help='clamp mode: how to handle interp of samples off the input image (clamp | pad)', default='clamp'})) + {type='string', help='clamp mode: how to handle interp of samples off the input image (clamp | pad)', default='clamp'}, + {type='number', help='pad value: value to pad image. Can only be set when clamp mode equals "pad"', default=0})) dok.error('incorrect arguments', 'image.warp') end -- This is a little messy, but convert mode string to an enum @@ -835,7 +922,7 @@ local function warp(...) dst = dst or src.new() dst:resize(src:size(1), field:size(2), field:size(3)) - src.image.warp(dst, src, field, mode, offset_mode, clamp_mode) + src.image.warp(dst, src, field, mode, offset_mode, clamp_mode, pad_value) if dim2 then dst = dst[1] end diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..97c8026 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,14 @@ +site_name: image +theme : simplex +repo_url : https://github.com/torch/image +use_directory_urls : false +markdown_extensions: [extra] +docs_dir : doc +pages: +- [index.md, Image] +- [saveload.md, Saving and Loading] +- [simpletransform.md, Simple Transformations] +- [paramtransform.md, Parameterized Transformations] +- [gui.md, Graphical User Interfaces] +- [colorspace.md, Color Space Conversions] +- [tensorconstruct.md, Tensor Constructors] @@ -1,10 +1,10 @@ #include <TH.h> #include <luaT.h> -#include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <assert.h> #define PNG_DEBUG 3 #include <png.h> @@ -13,6 +13,47 @@ #define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor) #define libpng_(NAME) TH_CONCAT_3(libpng_, Real, NAME) +/* + * Bookkeeping struct for reading png data from memory + */ +typedef struct { + unsigned char* buffer; + png_size_t offset; + png_size_t length; +} libpng_inmem_buffer; + +/* + * Call back for reading png data from memory + */ +static void +libpng_userReadData(png_structp pngPtrSrc, png_bytep dest, png_size_t length) +{ + libpng_inmem_buffer* src = png_get_io_ptr(pngPtrSrc); + assert(src->offset+length <= src->length); + memcpy(dest, src->buffer + src->offset, length); + src->offset += length; +} + +/* + * Error message wrapper (single member struct to preserve `str` size info) + */ +typedef struct { + char str[256]; +} libpng_errmsg; + +/* + * Custom error handling function (see `png_set_error_fn`) + */ +static void +libpng_error_fn(png_structp png_ptr, png_const_charp error_msg) +{ + libpng_errmsg *errmsg = png_get_error_ptr(png_ptr); + int max = sizeof(errmsg->str) - 1; + strncpy(errmsg->str, error_msg, max); + errmsg->str[max] = '\0'; + longjmp(png_jmpbuf(png_ptr), 1); +} + #include "generic/png.c" #include "THGenerateAllTypes.h" diff --git a/test/corrupt-ihdr.png b/test/corrupt-ihdr.png Binary files differnew file mode 100644 index 0000000..ca53ac9 --- /dev/null +++ b/test/corrupt-ihdr.png diff --git a/test/test_decompress_jpg.lua b/test/test_decompress_jpg.lua index a64a443..5728eaf 100755 --- a/test/test_decompress_jpg.lua +++ b/test/test_decompress_jpg.lua @@ -51,9 +51,11 @@ function test.LoadInvalid() local img_binary = torch.rand(file_size_bytes):mul(255):byte() -- Now decompress the image from the ByteTensor - local img_from_tensor = image.decompressJPG(img_binary) + local ok, img_from_tensor = pcall(function() + return image.decompressJPG(img_binary) + end) - mytester:assert(img_from_tensor == nil, + mytester:assert(not ok or img_from_tensor == nil, 'A non-nil was returned on an invalid input! ') end diff --git a/test/test_png.lua b/test/test_png.lua index c01915d..376ef6f 100644 --- a/test/test_png.lua +++ b/test/test_png.lua @@ -5,7 +5,19 @@ local mytester = torch.Tester() local precision_mean = 1e-3 local test = {} -function checkPNG(imfile, depth, tensortype, want) +local function toBlob(filename) + local f = torch.DiskFile(filename, 'r') + f:binary() + f:seekEnd() + local size = f:position() - 1 + f:seek(1) + local blob = torch.ByteTensor(size) + f:readByte(blob:storage()) + f:close() + return blob +end + +local function checkPNG(imfile, depth, tensortype, want) local img = image.load(imfile, depth, tensortype) -- Tensors have to be converted to double, since assertTensorEq does not support ByteTensor print('img: ', img) @@ -44,6 +56,20 @@ function test.LoadPNG() checkPNG('rgb16-2x1.png', 3, 'float', rgb16float) end +function test.DecompressPNG() + mytester:assertTensorEq( + image.load('rgb2x1.png'), + image.decompressPNG(toBlob('rgb2x1.png')), + precision_mean, + 'decompressed and loaded images should be equal' + ) +end + +function test.LoadCorruptedPNG() + local ok, _ = pcall(image.load, 'corrupt-ihdr.png') + mytester:assert(not ok, 'corrupted image should not be loaded') +end + -- Now run the test above mytester:add(test) mytester:run() diff --git a/test/test_scale.lua b/test/test_scale.lua new file mode 100644 index 0000000..f2225a7 --- /dev/null +++ b/test/test_scale.lua @@ -0,0 +1,51 @@ +require 'image' +require 'torch' + + +torch.setdefaulttensortype('torch.FloatTensor') + +local tester = torch.Tester() +local tests = {} + + +local function outerProduct(x) + x = torch.Tensor(x) + return torch.ger(x, x) +end + + +function tests.bilinearUpscale() + local im = outerProduct{1, 2, 4, 2} + local expected = outerProduct{1, 1.5, 2, 3, 4, 3, 2} + local actual = image.scale(im, expected:size(1), expected:size(2), 'bilinear') + tester:assertTensorEq(actual, expected, 1e-5) +end + + +function tests.bilinearDownscale() + local im = outerProduct{1, 2, 4, 2} + local expected = outerProduct{1.25, 3, 2.5} + local actual = image.scale(im, expected:size(1), expected:size(2), 'bilinear') + tester:assertTensorEq(actual, expected, 1e-5) +end + + +function tests.bicubicUpscale() + local im = outerProduct{1, 2, 4, 2} + local expected = outerProduct{1, 1.4375, 2, 3.1875, 4, 3.25, 2} + local actual = image.scale(im, expected:size(1), expected:size(2), 'bicubic') + tester:assertTensorEq(actual, expected, 1e-5) +end + + +function tests.bicubicDownscale() + local im = outerProduct{1, 2, 4, 2} + local expected = outerProduct{1, 3.1875, 2} + local actual = image.scale(im, expected:size(1), expected:size(2), 'bicubic') + tester:assertTensorEq(actual, expected, 1e-5) +end + + +tester:add(tests) +tester:run() + diff --git a/test/test_warp.lua b/test/test_warp.lua index 68331c8..68a4ac1 100644 --- a/test/test_warp.lua +++ b/test/test_warp.lua @@ -129,5 +129,11 @@ t1 = sys.clock() print("Rotation Time lanczos = " .. (t1 - t0)) -- Not a robust measure (should average) image.display{image = im_lanczos, zoom = 4, legend = 'rotation lanczos'} +im_lanczos = image.warp(im, flow, 'lanczos', false, 'pad') +image.display{image = im_lanczos, zoom = 4, legend = 'rotation lanczos (default pad)'} + +im_lanczos = image.warp(im, flow, 'lanczos', false, 'pad', 1) +image.display{image = im_lanczos, zoom = 4, legend = 'rotation lanczos (pad 1)'} + image.display{image = im, zoom = 4, legend = 'source image'} |