이전 가우시안 블러 샘플을 Kernel(compute) 쉐이더를 사용해 구현해 본다.
원본이미지를 리사이징해 작은 텍스처를 만들고, 블러링 작업은 이전 샘플과 동일하게 kernel 쉐이더로 5회 적용했다.
속도는 fragment 쉐이더와 큰 차이는 없다.
렌더타겟은 사이즈를 줄여 렌더링하는 용도로만 사용한다.
가로, 세로 작업을 위한 MTLTexture, MTLComputePipelineState 와 MTLFunction을 선언했다.
class Renderer:NSObject {
var device:MTLDevice!
var commandQueue: MTLCommandQueue!
var sharedDataPtr: UnsafeMutablePointer<SharedData>?
var imageVertexBuffer: MTLBuffer!
var sharedDataBuffer: MTLBuffer!
var imagePipelineState: MTLRenderPipelineState!
// 두개의 작업 텍스처
var workHTargetTexture: MTLTexture?
var workVTargetTexture: MTLTexture?
var renderPassDescriptor: MTLRenderPassDescriptor!
var renderPipelineState: MTLRenderPipelineState!
// 입력 이미지
var imageTexture: MTLTexture?
var imageResizeTexture: MTLTexture?
var imageDepthState:MTLDepthStencilState!
// 렌더링 쉐이더
var imageVertexFunction: MTLFunction!
var renderScreenFragmentFunction: MTLFunction!
var renderTextureFragmentFunction: MTLFunction!
//
var computeHPipelineState: MTLComputePipelineState!
var computeVPipelineState: MTLComputePipelineState!
var computeHFunction: MTLFunction!
var computeVFunction: MTLFunction!
override init() {
super.init()
self.device = MTLCreateSystemDefaultDevice()
initMetal()
}
.
.
.
}
초기화
kernel 함수 두개를 추가한 부분외에는 동일
func initMetal() {
guard let defaultLibrary = try? self.device.makeDefaultLibrary(bundle: Bundle(for: Renderer.self)) else {
print("[Renderer.initMetal] init error")
return
}
imageVertexFunction = defaultLibrary.makeFunction(name: "imageVertexFunction")
renderScreenFragmentFunction = defaultLibrary.makeFunction(name: "swapFragmentFunction")
renderTextureFragmentFunction = defaultLibrary.makeFunction(name: "imageResizeFragmentFunction")
computeHFunction = defaultLibrary.makeFunction(name: "gaussianBlurHFunction")
computeVFunction = defaultLibrary.makeFunction(name: "gaussianBlurVFunction")
self.commandQueue = self.device.makeCommandQueue()
let size = kImagePlaneVertexData.count * MemoryLayout<Float>.size
imageVertexBuffer = self.device.makeBuffer(bytes: kImagePlaneVertexData, length: size)
imageVertexBuffer.label = "ImageVertexBuffer"
// 공유데이터 버퍼
let sharedBufferSize = (MemoryLayout<SharedData>.size & ~0xFF) + 0x100
sharedDataBuffer = self.device.makeBuffer(length: sharedBufferSize, options: .storageModeShared)
sharedDataBuffer.label = "SharedBuffer"
initRederTarget()
initSwapRender()
initKernelTarget()
initGaussianFilter()
self.imageTexture = loadTexture(name:"sample", ext:"png")
}
렌더타겟 설정
기존 샘플과 큰 차이는 없으며, 작업용 텍스처를 별도로 2개 생성했다.
func initRederTarget() {
let imageVertexDescriptor = MTLVertexDescriptor()
imageVertexDescriptor.attributes[0].format = .float2
imageVertexDescriptor.attributes[0].offset = 0
imageVertexDescriptor.attributes[0].bufferIndex = 0
imageVertexDescriptor.attributes[1].format = .float2
imageVertexDescriptor.attributes[1].offset = 8
imageVertexDescriptor.attributes[1].bufferIndex = 0
imageVertexDescriptor.layouts[0].stride = 16
imageVertexDescriptor.layouts[0].stepRate = 1
imageVertexDescriptor.layouts[0].stepFunction = .perVertex
let imagePipelineDescriptor = MTLRenderPipelineDescriptor()
imagePipelineDescriptor.label = "ImageResizeRenderPipeline"
imagePipelineDescriptor.sampleCount = 1
imagePipelineDescriptor.vertexFunction = imageVertexFunction
imagePipelineDescriptor.fragmentFunction = renderTextureFragmentFunction
imagePipelineDescriptor.vertexDescriptor = imageVertexDescriptor
imagePipelineDescriptor.depthAttachmentPixelFormat = .invalid
imagePipelineDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
do {
try self.renderPipelineState = self.device.makeRenderPipelineState(descriptor: imagePipelineDescriptor)
} catch let error {
print("error=\(error.localizedDescription)")
}
let texDescriptor = MTLTextureDescriptor()
texDescriptor.textureType = MTLTextureType.type2D
texDescriptor.width = 256
texDescriptor.height = 256
texDescriptor.pixelFormat = .bgra8Unorm
texDescriptor.storageMode = .private
texDescriptor.usage = [.renderTarget, .shaderRead]
self.imageResizeTexture = self.device.makeTexture(descriptor: texDescriptor)
let tex2Descriptor = MTLTextureDescriptor()
tex2Descriptor.textureType = MTLTextureType.type2D
tex2Descriptor.width = 256
tex2Descriptor.height = 256
tex2Descriptor.pixelFormat = .bgra8Unorm
tex2Descriptor.storageMode = .private
tex2Descriptor.usage = [.shaderRead, .shaderWrite]
self.workHTargetTexture = self.device.makeTexture(descriptor: tex2Descriptor)
self.workVTargetTexture = self.device.makeTexture(descriptor: tex2Descriptor)
let clearColor = MTLClearColor(red: 0, green: 0, blue: 0, alpha: 1)
self.renderPassDescriptor = MTLRenderPassDescriptor()
self.renderPassDescriptor.colorAttachments[0].texture = self.imageResizeTexture
self.renderPassDescriptor.colorAttachments[0].loadAction = .clear
self.renderPassDescriptor.colorAttachments[0].clearColor = clearColor
self.renderPassDescriptor.colorAttachments[0].storeAction = .store
}
커널 파이프라인 설정
func initKernelTarget() {
do {
try self.computeHPipelineState = self.device.makeComputePipelineState(function: self.computeHFunction)
} catch let error {
print("error=\(error.localizedDescription)")
}
do {
try self.computeVPipelineState = self.device.makeComputePipelineState(function: self.computeVFunction)
} catch let error {
print("error=\(error.localizedDescription)")
}
}
가우시안 가중치 계산
func initGaussianFilter() {
// 쉐이더에 공통적으로 전달할 데이터 생성
// 시그마에 따른 가우시안
let SIGMA = 4.0 // sigma^2
let PI2 = 6.28319 // 2pi
let TAP = 7
let data = self.sharedDataBuffer.contents().assumingMemoryBound(to: SharedData.self)
data.pointee.tapCount = Float(TAP)
var total:Double = 0
var result = [Double](repeating: 0.0, count: TAP)
for i in 0..<TAP {
let x = Double(i - (TAP - 1) / 2)
result[i] = (1 / sqrtl(PI2 * SIGMA))*(expl( -(x*x) / (2*SIGMA)))
print("\(x)=\(result[i])")
total += result[i]
}
print("total=\(total)")
// 버퍼 데이터에 저장
// 저장시 합산이 1이 되도록 정규화
withUnsafeMutablePointer(to: &data.pointee.gaussian) { pointer in
pointer.withMemoryRebound(to: Float.self, capacity: TAP) { buffer in
var index = 0
for value in result {
buffer[index] = Float(value) / Float(total)
index += 1
}
}
}
print("\(data.pointee.gaussian)")
}
렌더링
원본이미지를 리사이징해서 텍스처를 생성하고, 5회 블러링 적용 후 화면에 렌더링을 진행한다.
kernel 쉐이더는 16x16 의 스레드 그룹으로 구성. 처음에만 리사이징된 텍스처를 사용하고, 이후에는 H, V 텍스처를 교체하며 블러링을 진행한다.
func render(view:MTKView) {
print("render")
let startTime = Int64((Date().timeIntervalSince1970 * 1000.0).rounded())
guard let renderPass = view.currentRenderPassDescriptor else { return }
guard let drawable = view.currentDrawable else { return }
guard let commandBuffer = self.commandQueue.makeCommandBuffer() else { return }
commandBuffer.label = "RenderCommand"
if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: self.renderPassDescriptor) {
encoder.label = "RenderResizeEncoder"
encoder.setCullMode(.front)
encoder.setRenderPipelineState(self.renderPipelineState)
encoder.setVertexBuffer(self.imageVertexBuffer, offset: 0, index: 0)
encoder.setFragmentTexture(self.imageTexture!, index: 0)
encoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
encoder.endEncoding()
}
// 블러
let threadGroupCount = MTLSizeMake(16, 16, 1)
let threadCountPerGroup = MTLSizeMake(
self.workHTargetTexture!.width / threadGroupCount.width,
self.workHTargetTexture!.height / threadGroupCount.height,
1)
for i in 0...5 {
var inputTexture:MTLTexture = self.workVTargetTexture!
if i == 0 {
inputTexture = self.imageResizeTexture!
}
if let encoder = commandBuffer.makeComputeCommandEncoder() {
encoder.setComputePipelineState(self.computeHPipelineState)
encoder.setTexture(inputTexture, index: 0)
encoder.setTexture(self.workHTargetTexture, index: 1)
encoder.setBuffer(self.sharedDataBuffer, offset: 0, index: 0)
encoder.dispatchThreadgroups(threadCountPerGroup, threadsPerThreadgroup: threadGroupCount)
encoder.endEncoding()
}
if let encoder = commandBuffer.makeComputeCommandEncoder() {
encoder.setComputePipelineState(self.computeVPipelineState)
encoder.setTexture(self.workHTargetTexture, index: 0)
encoder.setTexture(self.workVTargetTexture, index: 1)
encoder.setBuffer(self.sharedDataBuffer, offset: 0, index: 0)
encoder.dispatchThreadgroups(threadCountPerGroup, threadsPerThreadgroup: threadGroupCount)
encoder.endEncoding()
}
}
if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPass) {
encoder.label = "SwapEncoder"
encoder.setCullMode(.front)
encoder.setRenderPipelineState(self.imagePipelineState)
encoder.setDepthStencilState(self.imageDepthState)
encoder.setVertexBuffer(self.imageVertexBuffer, offset: 0, index: 0)
encoder.setFragmentTexture(self.workVTargetTexture, index: 0)
encoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
encoder.endEncoding()
}
commandBuffer.present(drawable)
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
let endTime = Int64((Date().timeIntervalSince1970 * 1000.0).rounded())
print("complete: \(endTime - startTime)ms)")
}
쉐이더
역시 큰 차이는 없으며, 전달된 그리드의 x, y 좌표에서 인접한 픽셀값을 가져와 가중치 적용 후 더해주면 된다.
kernel void gaussianBlurHFunction(
texture2d<float, access::read> input [[texture(0)]],
texture2d<float, access::write> output [[texture(1)]],
constant SharedData &sharedData [[buffer(0)]],
uint2 gid[[thread_position_in_grid]]) {
float3 sum = float3(0.0, 0.0, 0.0);
for (int i=0;i<sharedData.tapCount;i++) {
int index = i - (sharedData.tapCount - 1) / 2;
uint2 id = uint2(gid.x + index, gid.y);
sum += input.read(id).rgb * sharedData.gaussian[i];
}
float4 color = float4(sum, 1.0);
output.write( color, gid);
}
kernel void gaussianBlurVFunction(
texture2d<float, access::read> input [[texture(0)]],
texture2d<float, access::write> output [[texture(1)]],
constant SharedData &sharedData [[buffer(0)]],
uint2 gid[[thread_position_in_grid]]) {
float3 sum = float3(0.0, 0.0, 0.0);
for (int i=0;i<sharedData.tapCount;i++) {
int index = i - (sharedData.tapCount - 1) / 2;
uint2 id = uint2(gid.x, gid.y + index);
sum += input.read(id).rgb * sharedData.gaussian[i];
}
float4 color = float4(sum, 1.0);
output.write( color, gid);
}
나머지 쉐이더들은 처리할게 없으므로 기본 샘플링 색상만 리턴해 주면 된다.
typedef struct {
float3 position [[attribute(0)]];
float2 texCoord [[attribute(1)]];
} ImageVertex;
typedef struct {
float4 position [[position]];
float2 texCoord;
} ImageOut;
vertex ImageOut imageVertexFunction( ImageVertex in [[stage_in]]) {
ImageOut out;
float4 position = float4(in.position, 1.0);
out.position = position;
out.texCoord = in.texCoord;
return out;
}
fragment float4 imageResizeFragmentFunction(ImageOut in [[stage_in]],
texture2d<float> texture1 [[texture(0)]] ) {
constexpr sampler colorSampler;
float4 color = texture1.sample(colorSampler, in.texCoord);
return color;
}
fragment float4 swapFragmentFunction(ImageOut in [[stage_in]], texture2d<float> texture1 [[texture(0)]]) {
constexpr sampler colorSampler;
float4 color = texture1.sample(colorSampler, in.texCoord);
return color;
}
'프로그래밍 > iOS,macOS' 카테고리의 다른 글
오디오유닛 레벨 계산 (0) | 2021.05.01 |
---|---|
Image, PixelBuffer (0) | 2021.04.13 |
아이폰 로컬 화면 공유 : Broadcast Extension (0) | 2021.03.19 |
[Metal] 이미지렌더링~ 카메라 입력과 가우시안 블러~ (1) | 2021.02.13 |
CVPixelBuffer, CMSampleBuffer,Data, Metal Texture, vImage (0) | 2021.02.09 |
[Metal] 이미지 렌더링~ 가우시안 블러 (0) | 2021.02.07 |
[Metal] 이미지 렌더링~ 여러 텍스처 합치기 (0) | 2021.02.06 |
[Metal] 이미지 렌더링~ 텍스처에 렌더링 (0) | 2021.02.05 |
[Metal] 이미지 렌더링~ 텍스처 표시 (0) | 2021.02.03 |
[Metal] 이미지 렌더링~ 사각형 그리기 (0) | 2021.02.03 |