Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] s[op].bind for block & thread binding for OpenCL/CUDA targets #448

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions python/heterocl/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ def __str__(self):
return f"({self.xcel}, {self.memory}"

class CPU(Device):
"""cpu device with different models"""
def __init__(self, vendor, model, **kwargs):
if vendor not in model_table["cpu"]:
raise DeviceError(vendor + " not supported yet")
Expand All @@ -181,7 +180,6 @@ def __repr__(self):
return f"CPU({self.vendor}, {self.model}, {self.backend}, {self.dev_id})"

class FPGA(Device):
"""fpga device with different models"""
def __init__(self, vendor, model, **kwargs):
if vendor not in model_table["fpga"]:
raise DeviceError(vendor + " not supported yet")
Expand All @@ -193,8 +191,13 @@ def __init__(self, vendor, model, **kwargs):
def __repr__(self):
return f"FPGA({self.vendor}, {self.model}, {self.backend}, {self.dev_id})"

class GPU(Device):
def __init__(self, vendor, model, **kwargs):
super(GPU, self).__init__("GPU", vendor, model, **kwargs)
def __repr__(self):
return f"GPU({self.vendor}, {self.model}, {self.backend}, {self.dev_id})"

class PIM(Device):
"""cpu device with different models"""
def __init__(self, vendor, model, **kwargs):
if model not in ["ppac"]:
raise DeviceError(model + " not supported yet")
Expand Down
6 changes: 5 additions & 1 deletion python/heterocl/mlir/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def copy_build_files(target, script=None):
project = target.project
platform = str(target.tool.name)
mode = str(target.tool.mode)

# TODO: implement these steps & commands as methods of Tool objects
if platform == "vivado_hls":
os.system("cp " + path + "vivado/* " + project)
os.system("cp " + path + "harness.mk " + project)
Expand Down Expand Up @@ -55,8 +57,10 @@ def copy_build_files(target, script=None):
with open(os.path.join(project, "run.tcl"), "w") as tcl_file:
tcl_file.write(new_tcl)
return "success"
elif platform == "nvcc":
pass
else:
raise RuntimeError("Not implemented")
raise RuntimeError(f"compilation flow for {platform} target is not implemented")


def execute_fpga_backend(target):
Expand Down
21 changes: 21 additions & 0 deletions python/heterocl/mlir/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,15 @@ class Partition(object):
Block = 1
Cyclic = 2

class BlockIdx(object):
x = 0
y = 1
z = 2

class ThreadIdx(object):
x = 3
y = 4
z = 5

class Schedule(object):
"""Create a compute schedule
Expand Down Expand Up @@ -511,6 +520,18 @@ def tile(self, x_parent, y_parent, x_factor, y_factor):
# self.op.axis.insert(idx+2, tile_op.results[2])
# self.op.axis.insert(idx+3, tile_op.results[3])
return tile_op.results[0], tile_op.results[1], tile_op.results[2], tile_op.results[3]

def bind(self, var, thread_axis):
assert thread_axis < 5, "cannot support NDrange with dim > 3"
if isinstance(var, int):
var = self.op.axis[var]
if isinstance(var, hcl_d.CreateLoopHandleOp):
var = var.result
with get_context(), get_location():
i32 = IntegerType.get_unsigned(32)
thread_binding_type = IntegerAttr.get(i32, thread_axis)
hcl_d.ThreadBindOp(self.stage_handle.result,
var, ii, ip=GlobalInsertionPoint.get())

def pipeline(self, var, initiation_interval=1):
"""Pipeline the iteration.
Expand Down
12 changes: 12 additions & 0 deletions python/heterocl/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,17 @@ def set_mode(self, mode):
assert mode in self.suported_modes, f"{mode} not supported {self.suported_modes}"
self.mode = mode

class NVCC(Tool):
def __init__(self):
name = "nvcc"
mode = "impl"
options = {
"Version": "10.1.243"
}
super(NVCC, self).__init__(name, mode, options)
self.suported_modes = ["impl"]


class VivadoHLS(Tool):
def __init__(self):
name = "vivado_hls"
Expand Down Expand Up @@ -220,3 +231,4 @@ def __init__(self):
Tool.vitis = Vitis()
Tool.aocl = AOCL()
Tool.sdaccel = SDAccel()
Tool.nvcc = NVCC()
40 changes: 40 additions & 0 deletions tests/mlir/hcl-mlir/test_gpu_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import heterocl as hcl
import numpy as np
import hcl_mlir


def test_host_xcel():
# Custom GPU platform
xcel = hcl.devices.GPU("nvidia", "gtx-1080-ti")
host = hcl.devices.CPU("intel", "e5")

target = hcl.Platform(
name = "gpu_platform",
devs = [host, xcel],
host = host, xcel = xcel,
tool = None
)
target.config(compiler="nvcc", project="gpu.prj")

# vector-add program
hcl_mlir.enable_extract_function()
A = hcl.placeholder((256,), "A")
B = hcl.placeholder((256,), "B")
def kernel(A, B):
C = hcl.compute((256,), lambda i: A[i] + B[i], "C")
return C
s = hcl.create_schedule([A, B], kernel)

# thread/block binding
num_of_threads_per_block = 64
bx, tx = s[kernel.C].split(kernel.C.axis[0], factor=num_of_threads_per_block)
s[kernel.C].bind(bx, hcl.BlockIdx.x)
s[kernel.C].bind(tx, hcl.ThreadIdx.x)

s.to([A], target.xcel)
s.to([kernel.C], target.host)
mod = hcl.build(s, target)


if __name__ == "__main__":
test_host_xcel()