diff --git a/README.md b/README.md index 4e823017..bde6f2cc 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The fastest and most memory efficient lattice Boltzmann CFD software, running on any GPU via [OpenCL](https://github.com/ProjectPhysX/OpenCL-Wrapper "OpenCL-Wrapper"). -10 billion voxel Space Shuttle simulation700 million voxel raindrop simulation
+10 billion voxel Space Shuttle simulation1 billion voxel raindrop simulation
Hydraulic jump simulationStar Wars X-wing simulation @@ -175,6 +175,7 @@ In consequence, the arithmetic intensity of this implementation is 2.13 (FP32/FP | Nvidia A100 SXM4 40GB | 19.49 | 40 | 1555 | 8522 (84%) | 16013 (79%) | 11251 (56%) | | Nvidia A100 PCIe 40GB | 19.49 | 40 | 1555 | 8526 (84%) | 16035 (79%) | 11088 (55%) | | Nvidia Tesla V100 16GB | 14.13 | 16 | 900 | 5128 (87%) | 10325 (88%) | 7683 (66%) | +| Nvidia Quadro GV100 | 16.66 | 32 | 870 | 3442 (61%) | 6641 (59%) | 5863 (52%) | | Nvidia Tesla P100 16GB | 9.52 | 16 | 732 | 3295 (69%) | 5950 (63%) | 4176 (44%) | | Nvidia Tesla P100 12GB | 9.52 | 12 | 549 | 2427 (68%) | 4141 (58%) | 3999 (56%) | | Nvidia Tesla K40m | 4.29 | 12 | 288 | 1131 (60%) | 1868 (50%) | 912 (24%) | @@ -198,7 +199,7 @@ In consequence, the arithmetic intensity of this implementation is 2.13 (FP32/FP | Nvidia GeForce GTX 960M | 1.51 | 4 | 80 | 442 (84%) | 872 (84%) | 627 (60%) | | Nvidia Quadro K2000 | 0.73 | 2 | 64 | 312 (75%) | 444 (53%) | 171 (21%) | | Nvidia GeForce GT 630 (OEM) | 0.46 | 2 | 29 | 151 (81%) | 185 (50%) | 78 (21%) | -| Apple M1 Pro 16-Core 16GB | 4.10 | 11 | 200 | 1204 (92%) | 2329 (90%) | 1855 (71%) | +| Apple M1 Pro GPU 16C 16GB | 4.10 | 11 | 200 | 1204 (92%) | 2329 (90%) | 1855 (71%) | | AMD Radeon Vega 8 Graphics | 1.23 | 7 | 38 | 157 (63%) | 282 (57%) | 288 (58%) | | Intel UHD Graphics 630 | 0.46 | 7 | 51 | 151 (45%) | 301 (45%) | 187 (28%) | | Intel HD Graphics 5500 | 0.35 | 3 | 26 | 75 (45%) | 192 (58%) | 108 (32%) | diff --git a/src/defines.hpp b/src/defines.hpp index d636651e..86ba393d 100644 --- a/src/defines.hpp +++ b/src/defines.hpp @@ -10,6 +10,11 @@ #define SRT // choose single-relaxation-time LBM collision operator; (default) //#define TRT // choose two-relaxation-time LBM collision operator +//#define FP16S // compress LBM DDFs to range-shifted IEEE-754 FP16; number conversion is done in hardware; all arithmetic is still done in FP32 +//#define FP16C // compress LBM DDFs to more accurate custom FP16C format; number conversion is emulated in software; all arithmetic is still done in FP32 + +#define BENCHMARK // disable all extensions and setups and run benchmark setup instead + //#define VOLUME_FORCE // enables global force per volume in one direction, specified in the LBM class constructor; the force can be changed on-the-fly between time steps at no performance cost //#define FORCE_FIELD // enables a force per volume for each lattice point independently; allocates an extra 12 Bytes/node; enables computing the forces from the fluid on solid boundaries with lbm.calculate_force_on_boundaries(); //#define MOVING_BOUNDARIES // enables moving solids: set solid nodes to TYPE_S and set their velocity u unequal to zero @@ -18,17 +23,12 @@ //#define TEMPERATURE // enables temperature extension; set fixed-temperature nodes with TYPE_T (similar to EQUILIBRIUM_BOUNDARIES); allocates an extra 32 (FP32) or 18 (FP16) Bytes/node //#define SUBGRID // enables Smagorinsky-Lilly subgrid turbulence model to keep simulations with very large Reynolds number stable -//#define FP16S // compress LBM DDFs to range-shifted IEEE-754 FP16; number conversion is done in hardware; all arithmetic is still done in FP32 -//#define FP16C // compress LBM DDFs to more accurate custom FP16C format; number conversion is emulated in software; all arithmetic is still done in FP32 - //#define WINDOWS_GRAPHICS // enable interactive graphics in Windows; start/pause the simulation by pressing P //#define CONSOLE_GRAPHICS // enable interactive graphics in the console; start/pause the simulation by pressing P //#define GRAPHICS // run FluidX3D in the console, but still enable graphics functionality for writing rendered frames to the hard drive -#define BENCHMARK // disable all extensions and setups and run benchmark setup instead - -#define GRAPGICS_FRAME_WIDTH 15360 // set frame width if only GRAPHICS is enabled -#define GRAPGICS_FRAME_HEIGHT 8640 // set frame height if only GRAPHICS is enabled +#define GRAPHICS_FRAME_WIDTH 3840 // set frame width if only GRAPHICS is enabled +#define GRAPHICS_FRAME_HEIGHT 2160 // set frame height if only GRAPHICS is enabled #define GRAPHICS_BACKGROUND_COLOR 0x000000 // set background color; black background (default) = 0x000000, white background = 0xFFFFFF #define GRAPHICS_U_MAX 0.15f // maximum velocity for velocity coloring in units of LBM lattice speed of sound (c=1/sqrt(3)) (default: 0.15f) #define GRAPHICS_Q_CRITERION 0.0001f // Q-criterion value for Q-criterion isosurface visualization (default: 0.0001f) diff --git a/src/graphics.cpp b/src/graphics.cpp index 70a0b716..a7322602 100644 --- a/src/graphics.cpp +++ b/src/graphics.cpp @@ -901,8 +901,8 @@ void draw_label(const Color& c, const string& s, const int x, const int y) {} int main(int argc, char* argv[]) { main_arguments = get_main_arguments(argc, argv); camera.fps_limit = 60u; // find out screen refresh rate - camera.width = GRAPGICS_FRAME_WIDTH; // must be divisible by 8 - camera.height = GRAPGICS_FRAME_HEIGHT; // must be divisible by 8 + camera.width = GRAPHICS_FRAME_WIDTH; // must be divisible by 8 + camera.height = GRAPHICS_FRAME_HEIGHT; // must be divisible by 8 camera.fov = 100.0f; set_zoom(1.0f); // set initial zoom camera.update_matrix(); diff --git a/src/info.cpp b/src/info.cpp index 8df0cc4a..855f9e6d 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -119,6 +119,16 @@ void Info::print_update() const { (steps==max_ulong ? alignr(17, lbm->get_t()) : alignr(12, lbm->get_t())+" "+print_percentage((double)(lbm->get_t()-steps_last)/(double)steps))+" | "+ // current step alignr(19, print_time(time()))+" |" // either elapsed time or remaining time ); +#ifdef GRAPHICS + if(key_H) { // print camera settings + const string camera_position = "float3("+to_string(camera.pos.x/(float)lbm->get_Nx(), 6u)+"f*(float)Nx, "+to_string(camera.pos.y/(float)lbm->get_Ny(), 6u)+"f*(float)Ny, "+to_string(camera.pos.z/(float)lbm->get_Nz(), 6u)+"f*(float)Nz)"; + const string camera_rx_ry_fov = to_string(degrees(camera.rx)-90.0f, 1u)+"f, "+to_string(180.0f-degrees(camera.ry), 1u)+"f, "+to_string(camera.fov, 1u)+"f"; + const string camera_zoom = to_string(camera.zoom*(float)fmax(fmax(lbm->get_Nx(), lbm->get_Ny()), lbm->get_Nz())/(float)min(camera.width, camera.height), 6u)+"f"; + if(camera.free) print_info("lbm.graphics.set_camera_free("+camera_position+", "+camera_rx_ry_fov+");"); + else print_info("lbm.graphics.set_camera_centered("+camera_rx_ry_fov+", "+camera_zoom+");"); + key_H = false; + } +#endif // GRAPHICS } void Info::print_finalize() { allow_rendering = false; diff --git a/src/kernel.cpp b/src/kernel.cpp index 9cbce9d7..096a3145 100644 --- a/src/kernel.cpp +++ b/src/kernel.cpp @@ -471,9 +471,9 @@ string opencl_c_container() { return R( // ########################## begin of O cube *= 15u; uint i; // number of triangle vertices for(i=0u; i<15u&&triangle_table(cube+i)!=15u; i+=3u) { // create the triangles - triangles[i+2u] = vertex[triangle_table(cube+i+2u)]; - triangles[i+1u] = vertex[triangle_table(cube+i+1u)]; triangles[i ] = vertex[triangle_table(cube+i )]; + triangles[i+1u] = vertex[triangle_table(cube+i+1u)]; + triangles[i+2u] = vertex[triangle_table(cube+i+2u)]; } return i/3u; // return number of triangles } @@ -1864,6 +1864,52 @@ string opencl_c_container() { return R( // ########################## begin of O +)+R(kernel void voxelize_mesh(global uchar* flags, const uchar flag, const global float* p0, const global float* p1, const global float* p2, const uint triangle_number, float x0, float y0, float z0, float x1, float y1, float z1) { // voxelize triangle mesh + const uint n = get_global_id(0); // n = x+(y+z*Ny)*Nx + const float3 p = position(coordinates(n))+(float3)(0.5f*(float)def_Nx-0.5f, 0.5f*(float)def_Ny-0.5f, 0.5f*(float)def_Nz-0.5f); + const bool condition = p.xx1||p.y>y1||p.z>z1; + volatile local uint workgroup_condition; + workgroup_condition = 1u; + barrier(CLK_LOCAL_MEM_FENCE); + atomic_and(&workgroup_condition, (uint)condition); + barrier(CLK_LOCAL_MEM_FENCE); + const bool workgroup_all = (bool)workgroup_condition; + if(workgroup_all) return; // return straight away if grid point is outside the bounds of the mesh (~4x faster) + const float3 r0_origin = p; + const float3 r1_origin = p; + const float3 r0_direction = (float3)(+0.01f, +0.04f, +1.03f); // from each grid point, shoot an outward ray and count how often it intersects the mesh, odd number -> grid point is inside mesh + const float3 r1_direction = (float3)(-0.05f, -0.06f, -1.07f); // to eliminate errors, repeat with a second ray in a different random direction + uint intersections_0=0u, intersections_1=0u; + const uint lid = get_local_id(0); + local float3 cache_p0[def_workgroup_size]; // use local memory (~25% faster) + local float3 cache_p1[def_workgroup_size]; + local float3 cache_p2[def_workgroup_size]; + for(uint i=0u; i=0.0f&&s<=1.0f&&t>=0.0f&&s+t<=1.0f&&f*dot(v, q)>0.0f); + } { + const float3 w=r1_origin-p0i, h=cross(r1_direction, v), q=cross(w, u); + const float f=1.0f/dot(u, h), s=f*dot(w, h), t=f*dot(r1_direction, q); + intersections_1 += (uint)(s>=0.0f&&s<=1.0f&&t>=0.0f&&s+t<=1.0f&&f*dot(v, q)>0.0f); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + if(intersections_0%2u&&intersections_1%2u) flags[n] = flag; +} // voxelize_mesh() + + + // ################################################## graphics code ################################################## )+"#ifdef GRAPHICS"+R( diff --git a/src/lbm.cpp b/src/lbm.cpp index e1377fb6..b5810686 100644 --- a/src/lbm.cpp +++ b/src/lbm.cpp @@ -22,7 +22,8 @@ LBM::LBM(const uint Nx, const uint Ny, const uint Nz, const float nu, const floa #endif // GRAPHICS int select_device = -1; if((int)main_arguments.size()>0) select_device = to_int(main_arguments[0]); - Device device(select_device<0 ? select_device_with_most_flops() : select_device_with_id((uint)select_device), opencl_c_code); + const vector& devices = get_devices(); + this->device = Device(select_device<0 ? select_device_with_most_flops(devices) : select_device_with_id((uint)select_device, devices), opencl_c_code); allocate(device); // lbm first #ifdef GRAPHICS graphics.allocate(device); // graphics after lbm @@ -383,33 +384,60 @@ void LBM::T_write_device_to_vtk(const string& path) { } #endif // TEMPERATURE +void LBM::voxelize_mesh(const Mesh* mesh, const uchar flag) { // voxelize triangle mesh + print_info("Voxelizing mesh. This may take a few minutes."); + Memory p0(device, mesh->triangle_number, 1u, mesh->p0); + Memory p1(device, mesh->triangle_number, 1u, mesh->p1); + Memory p2(device, mesh->triangle_number, 1u, mesh->p2); + const float x0=mesh->pmin.x, y0=mesh->pmin.y, z0=mesh->pmin.z, x1=mesh->pmax.x, y1=mesh->pmax.y, z1=mesh->pmax.z; // use bounding box of mesh to speed up voxelization + Kernel kernel_voxelize_mesh(device, get_N(), "voxelize_mesh", flags, flag, p0, p1, p2, mesh->triangle_number, x0, y0, z0, x1, y1, z1); + p0.write_to_device(); + p1.write_to_device(); + p2.write_to_device(); + flags.write_to_device(); + kernel_voxelize_mesh.run(); + flags.read_from_device(); +} +void LBM::voxelize_stl(const string& path, const float3& center, const float3x3& rotation, const float size, const uchar flag) { // voxelize triangle mesh + const Mesh* mesh = read_stl(path, float3(get_Nx(), get_Ny(), get_Nz()), center, rotation, size); + voxelize_mesh(mesh, flag); + delete mesh; +} +void LBM::voxelize_stl(const string& path, const float3x3& rotation, const float size, const uchar flag) { // read and voxelize binary .stl file (place in box center) + voxelize_stl(path, center(), rotation, size, flag); +} +void LBM::voxelize_stl(const string& path, const float3& center, const float size, const uchar flag) { // read and voxelize binary .stl file (no rotation) + voxelize_stl(path, center, float3x3(1.0f), size, flag); +} +void LBM::voxelize_stl(const string& path, const float size, const uchar flag) { // read and voxelize binary .stl file (place in box center, no rotation) + voxelize_stl(path, center(), float3x3(1.0f), size, flag); +} + string LBM::device_defines() const { return "\n #define def_Nx "+to_string(Nx)+"u" "\n #define def_Ny "+to_string(Ny)+"u" "\n #define def_Nz "+to_string(Nz)+"u" "\n #define def_N " +to_string(get_N())+"ul" - "\n #define def_velocity_set "+to_string(velocity_set)+"u" // D2Q9/D3Q15/D3Q19/D3Q27 - "\n #define def_c 0.57735027f" // lattice speed of sound c = 1/sqrt(3)*dt - "\n #define def_w " +to_string(1.0f/(3.0f*nu+0.5f))+"f" // relaxation rate w = dt/tau = dt/(nu/c^2+dt/2) = 1/(3*nu+1/2) + "\n #define D"+to_string(dimensions)+"Q"+to_string(velocity_set)+"" // D2Q9/D3Q15/D3Q19/D3Q27 + "\n #define def_velocity_set "+to_string(velocity_set)+"u" // LBM velocity set (D2Q9/D3Q15/D3Q19/D3Q27) + "\n #define def_dimensions "+to_string(dimensions)+"u" // number spatial dimensions (2D or 3D) + "\n #define def_c 0.57735027f" // lattice speed of sound c = 1/sqrt(3)*dt + "\n #define def_w " +to_string(1.0f/get_tau())+"f" // relaxation rate w = dt/tau = dt/(nu/c^2+dt/2) = 1/(3*nu+1/2) #if defined(D2Q9) - "\n #define D2Q9" "\n #define def_w0 (1.0f/2.25f)" // center (0) "\n #define def_ws (1.0f/9.0f)" // straight (1-4) "\n #define def_we (1.0f/36.0f)" // edge (5-8) #elif defined(D3Q15) - "\n #define D3Q15" "\n #define def_w0 (1.0f/4.5f)" // center (0) "\n #define def_ws (1.0f/9.0f)" // straight (1-6) "\n #define def_wc (1.0f/72.0f)" // corner (7-14) #elif defined(D3Q19) - "\n #define D3Q19" "\n #define def_w0 (1.0f/3.0f)" // center (0) "\n #define def_ws (1.0f/18.0f)" // straight (1-6) "\n #define def_we (1.0f/36.0f)" // edge (7-18) #elif defined(D3Q27) - "\n #define D3Q27" "\n #define def_w0 (1.0f/3.375f)" // center (0) "\n #define def_ws (1.0f/13.5f)" // straight (1-6) "\n #define def_we (1.0f/54.0f)" // edge (7-18) @@ -479,9 +507,9 @@ string LBM::device_defines() const { return #ifdef TEMPERATURE "\n #define TEMPERATURE" - "\n #define def_T_avg "+to_string(T_avg)+"f" // average temperature - "\n #define def_beta "+to_string(beta)+"f" // thermal expansion coefficient "\n #define def_w_T "+to_string(1.0f/(2.0f*alpha+0.5f))+"f" // wT = dt/tauT = 1/(2*alpha+1/2), alpha = thermal diffusion coefficient + "\n #define def_beta "+to_string(beta)+"f" // thermal expansion coefficient + "\n #define def_T_avg "+to_string(T_avg)+"f" // average temperature #endif // TEMPERATURE #ifdef SUBGRID diff --git a/src/lbm.hpp b/src/lbm.hpp index 3f67c71e..9d037e94 100644 --- a/src/lbm.hpp +++ b/src/lbm.hpp @@ -6,6 +6,7 @@ class LBM { private: + Device device; // OpenCL device associated with this LBM object Kernel kernel_initialize; // initialization kernel Kernel kernel_stream_collide; // main LBM kernel Kernel kernel_update_fields; // reads DDFs and updates (rho, u, T) in device memory @@ -14,12 +15,16 @@ class LBM { #if defined(D2Q9) const uint velocity_set = 9u; + const uint dimensions = 2u; #elif defined(D3Q15) const uint velocity_set = 15u; + const uint dimensions = 3u; #elif defined(D3Q19) const uint velocity_set = 19u; + const uint dimensions = 3u; #elif defined(D3Q27) const uint velocity_set = 27u; + const uint dimensions = 3u; #endif // D3Q27 #ifdef FORCE_FIELD @@ -116,6 +121,9 @@ class LBM { coordinates(n, x, y, z); return position(x, y, z); } + float3 size() const { // returns size of box + return float3((float)Nx, (float)Ny, (float)Nz); + } float3 center() const { // returns center of box return float3(0.5f*(float)Nx-0.5f, 0.5f*(float)Ny-0.5f, 0.5f*(float)Nz-0.5f); } @@ -170,6 +178,12 @@ class LBM { void T_write_device_to_vtk(const string& path=""); // write binary .vtk file #endif // TEMPERATURE + void voxelize_mesh(const Mesh* mesh, const uchar flag=TYPE_S); // voxelize mesh + void voxelize_stl(const string& path, const float3& center, const float3x3& rotation, const float size=-1.0f, const uchar flag=TYPE_S); // read and voxelize binary .stl file + void voxelize_stl(const string& path, const float3x3& rotation, const float size=-1.0f, const uchar flag=TYPE_S); // read and voxelize binary .stl file (place in box center) + void voxelize_stl(const string& path, const float3& center, const float size=-1.0f, const uchar flag=TYPE_S); // read and voxelize binary .stl file (no rotation) + void voxelize_stl(const string& path, const float size=-1.0f, const uchar flag=TYPE_S); // read and voxelize binary .stl file (place in box center, no rotation) + #ifdef GRAPHICS class Graphics { private: diff --git a/src/setup.cpp b/src/setup.cpp index cc98f3ef..a65764e2 100644 --- a/src/setup.cpp +++ b/src/setup.cpp @@ -47,7 +47,7 @@ /*void main_setup() { // Poiseuille flow validation // ######################################################### define simulation box size, viscosity and volume force ############################################################################ - const uint R = 63; // channel radius (default: 63) + const uint R = 63u; // channel radius (default: 63) const float umax = 0.1f; // maximum velocity in channel center (must be < 0.57735027f) const float tau = 1.0f; // relaxation time (must be > 0.5f), tau = nu*3+0.5 const float nu = units.nu_from_tau(tau); // nu = (tau-0.5)/3 @@ -186,8 +186,8 @@ const float3 p2 = offset+float3(+20*(int)L/64, 90*(int)L/64, -10*(int)L/64); const uint N=lbm.get_N(), Nx=lbm.get_Nx(), Ny=lbm.get_Ny(), Nz=lbm.get_Nz(); for(uint n=0u, x=0u, y=0u, z=0u; nget_N(); n++) lbm->flags[n] &= ~TYPE_S; // clear flags const float3x3 rotation = float3x3(float3(0.2f, 1.0f, 0.1f), radians(0.4032f)); // create rotation matrix to rotate mesh mesh->rotate(rotation); // rotate mesh - voxelize_mesh(*lbm, mesh, TYPE_S); // voxelize rotated mesh in lbm.flags + voxelize_mesh_hull(*lbm, mesh, TYPE_S); // voxelize rotated mesh in lbm.flags revoxelizing = false; // indicate new voxelizer thread has finished } void main_setup() { // Star Wars TIE fighter // ######################################################### define simulation box size, viscosity and volume force ############################################################################ - const uint L = 852u; + const uint L = 256u; const float Re = 100000.0f; const float u = 0.125f; - LBM lbm(L, 2u*L, L, units.nu_from_Re(Re, (float)L, u)); + LBM lbm(L, L*2u, L, units.nu_from_Re(Re, (float)L, u)); // ############################################################################################################################################################################################# const float size = 0.65f*(float)L; - const float3 center = float3(lbm.center().x, 32.0f+0.5f*size, lbm.center().z); + const float3 center = float3(lbm.center().x, 0.6f*size, lbm.center().z); const float3x3 rotation = float3x3(float3(1, 0, 0), radians(90.0f)); - Mesh* mesh = read_stl(lbm, get_exe_path()+"../stl/TIE-fighter.stl", center, rotation, size); // https://www.thingiverse.com/thing:2919109/files - voxelize_mesh(lbm, mesh, TYPE_S); + Mesh* mesh = read_stl(get_exe_path()+"../stl/TIE-fighter.stl", lbm.size(), center, rotation, size); // https://www.thingiverse.com/thing:2919109/files + voxelize_mesh_hull(lbm, mesh, TYPE_S); const uint N=lbm.get_N(), Nx=lbm.get_Nx(), Ny=lbm.get_Ny(), Nz=lbm.get_Nz(); for(uint n=0u, x=0u, y=0u, z=0u; nH+R) { // make drops that hit the simulation box ceiling disappear lbm.rho[n] = 0.5f; lbm.flags[n] = TYPE_E; } @@ -673,21 +693,27 @@ void main_setup() { // Star Wars TIE fighter lbm.run(0u); //key_1 = false; // turn off boundary //key_6 = true; // turn on surface raytracing - //lbm.graphics.set_camera_centered(-30.0f, 20.0f, 100.0f, 1.0f); //Clock clock; // image //lbm.run(units.t(0.0015f)); //print_info("compute time: "+print_time(clock.stop())); //clock.start(); + //lbm.graphics.set_camera_centered(-30.0f, 20.0f, 100.0f, 1.0f); //lbm.graphics.write_frame_png(); //print_info("render time: "+to_string(clock.stop(), 3u)); // video - //lbm.graphics.write_frame_png(); - //while(units.si_t(lbm.get_t())<0.0015f) { - // lbm.run(units.t(0.0015f)/300u); - // lbm.graphics.write_frame_png(); + //while(units.si_t(lbm.get_t())<=0.003f) { + // lbm.graphics.set_camera_centered(-30.0f, 20.0f, 100.0f, 1.0f); + // lbm.graphics.write_frame_png(get_exe_path()+"export/new/"); + // lbm.graphics.set_camera_centered(10.0f, 40.0f, 100.0f, 1.0f); + // lbm.graphics.write_frame_png(get_exe_path()+"export/p/"); + // lbm.graphics.set_camera_centered(0.0f, 0.0f, 45.0f, 1.0f); + // lbm.graphics.write_frame_png(get_exe_path()+"export/o/"); + // lbm.graphics.set_camera_centered(0.0f, 90.0f, 45.0f, 1.0f); + // lbm.graphics.write_frame_png(get_exe_path()+"export/t/"); + // lbm.run(units.t(0.004f/600u)); //} //print_info("compute + render time: "+to_string(clock.stop(), 3u)); diff --git a/src/shapes.cpp b/src/shapes.cpp index e5fa2f7e..b928dceb 100644 --- a/src/shapes.cpp +++ b/src/shapes.cpp @@ -184,81 +184,20 @@ void voxelize_triangle(LBM& lbm, const float3& p0, const float3& p1, const float for(float i=0.0f; i0u&&filesize==84u+50u*triangle_number) print_info("Loading \""+filename+"\" with "+to_string(triangle_number)+" triangles."); - else print_error("File \""+filename+"\" is corrupt or unsupported! Only binary .stl files are supported."); - Mesh* mesh = new Mesh(triangle_number, center); - mesh->p0[0] = float3(0.0f); // to fix warning C6001 - for(uint i=0u; ip0[i] = rotation*float3(triangle_data[ 3], triangle_data[ 4], triangle_data[ 5]); // read positions of triangle vertices and rotate them - mesh->p1[i] = rotation*float3(triangle_data[ 6], triangle_data[ 7], triangle_data[ 8]); - mesh->p2[i] = rotation*float3(triangle_data[ 9], triangle_data[10], triangle_data[11]); - } - delete[] data; - float3 pmin=mesh->p0[0], pmax=pmin; // auto-rescale mesh - for(uint i=0u; ip0[i], p1=mesh->p1[i], p2=mesh->p2[i]; - pmin.x = fmin(fmin(fmin(p0.x, p1.x), p2.x), pmin.x); - pmin.y = fmin(fmin(fmin(p0.y, p1.y), p2.y), pmin.y); - pmin.z = fmin(fmin(fmin(p0.z, p1.z), p2.z), pmin.z); - pmax.x = fmax(fmax(fmax(p0.x, p1.x), p2.x), pmax.x); - pmax.y = fmax(fmax(fmax(p0.y, p1.y), p2.y), pmax.y); - pmax.z = fmax(fmax(fmax(p0.z, p1.z), p2.z), pmax.z); - } - const float3 offset = -0.5f*(pmin+pmax); - float scale = 1.0f; - if(size>0) { // rescale to specified size - scale = size/fmax(fmax(pmax.x-pmin.x, pmax.y-pmin.y), pmax.z-pmin.z); - } else { // auto-rescale to largest possible size - const float scale_x = (float)lbm.get_Nx()/(pmax.x-pmin.x); - const float scale_y = (float)lbm.get_Ny()/(pmax.y-pmin.y); - const float scale_z = (float)lbm.get_Nz()/(pmax.z-pmin.z); - scale = fmin(fmin(scale_x, scale_y), scale_z); - } - for(uint i=0u; ip0[i] = center+scale*(offset+mesh->p0[i]); - mesh->p1[i] = center+scale*(offset+mesh->p1[i]); - mesh->p2[i] = center+scale*(offset+mesh->p2[i]); - } - return mesh; -} -Mesh* read_stl(LBM& lbm, const string& path, const float3x3& rotation, const float size) { // read binary .stl file (place in box center) - return read_stl(lbm, path, lbm.center(), rotation, size); -} -Mesh* read_stl(LBM& lbm, const string& path, const float3& center, const float size) { // read binary .stl file (no rotation) - return read_stl(lbm, path, center, float3x3(1.0f), size); -} -Mesh* read_stl(LBM& lbm, const string& path, const float size) { // read binary .stl file (place in box center, no rotation) - return read_stl(lbm, path, lbm.center(), float3x3(1.0f), size); -} -void voxelize_mesh(LBM& lbm, const Mesh* mesh, const uchar flag) { // voxelize mesh +void voxelize_mesh_hull(LBM& lbm, const Mesh* mesh, const uchar flag) { // voxelize mesh for(uint i=0u; itriangle_number; i++) voxelize_triangle(lbm, mesh->p0[i], mesh->p1[i], mesh->p2[i], flag); // voxelize mesh } -void voxelize_stl(LBM& lbm, const string& path, const float3& center, const float3x3& rotation, const float size, const uchar flag) { // read and voxelize binary .stl file - const Mesh* mesh = read_stl(lbm, path, center, rotation, size); - voxelize_mesh(lbm, mesh, flag); +void voxelize_stl_hull(LBM& lbm, const string& path, const float3& center, const float3x3& rotation, const float size, const uchar flag) { // read and voxelize binary .stl file + const Mesh* mesh = read_stl(path, float3(lbm.get_Nx(), lbm.get_Ny(), lbm.get_Nz()), center, rotation, size); + voxelize_mesh_hull(lbm, mesh, flag); delete mesh; } -void voxelize_stl(LBM& lbm, const string& path, const float3x3& rotation, const float size, const uchar flag) { // read and voxelize binary .stl file (place in box center) - voxelize_stl(lbm, path, lbm.center(), rotation, size, flag); +void voxelize_stl_hull(LBM& lbm, const string& path, const float3x3& rotation, const float size, const uchar flag) { // read and voxelize binary .stl file (place in box center) + voxelize_stl_hull(lbm, path, lbm.center(), rotation, size, flag); } -void voxelize_stl(LBM& lbm, const string& path, const float3& center, const float size, const uchar flag) { // read and voxelize binary .stl file (no rotation) - voxelize_stl(lbm, path, center, float3x3(1.0f), size, flag); +void voxelize_stl_hull(LBM& lbm, const string& path, const float3& center, const float size, const uchar flag) { // read and voxelize binary .stl file (no rotation) + voxelize_stl_hull(lbm, path, center, float3x3(1.0f), size, flag); } -void voxelize_stl(LBM& lbm, const string& path, const float size, const uchar flag) { // read and voxelize binary .stl file (place in box center, no rotation) - voxelize_stl(lbm, path, lbm.center(), float3x3(1.0f), size, flag); +void voxelize_stl_hull(LBM& lbm, const string& path, const float size, const uchar flag) { // read and voxelize binary .stl file (place in box center, no rotation) + voxelize_stl_hull(lbm, path, lbm.center(), float3x3(1.0f), size, flag); } \ No newline at end of file diff --git a/src/shapes.hpp b/src/shapes.hpp index 74d72973..51dfdc9e 100644 --- a/src/shapes.hpp +++ b/src/shapes.hpp @@ -27,47 +27,9 @@ float plane_plic(const uint x, const uint y, const uint z, const float3& p, cons class LBM; // forward-declare LBM class void voxelize_line(LBM& lbm, const float3& p0, const float3& p1, const uchar flag); // voxelize line void voxelize_triangle(LBM& lbm, const float3& p0, const float3& p1, const float3& p2, const uchar flag); // voxelize triangle +void voxelize_mesh_hull(LBM& lbm, const Mesh* mesh, const uchar flag); // voxelize mesh -struct Mesh { // triangle mesh - uint triangle_number = 0u; - float3 center; - float3* p0; - float3* p1; - float3* p2; - Mesh(const uint triangle_number, const float3& center) { - this->triangle_number = triangle_number; - this->center = center; - this->p0 = new float3[triangle_number]; - this->p1 = new float3[triangle_number]; - this->p2 = new float3[triangle_number]; - } - ~Mesh() { - delete[] p0; - delete[] p1; - delete[] p2; - } - void scale(const float scale) { - for(uint i=0u; itriangle_number = triangle_number; + this->center = this->pmin = this->pmax = center; + this->p0 = new float3[triangle_number]; + this->p1 = new float3[triangle_number]; + this->p2 = new float3[triangle_number]; + } + inline ~Mesh() { + delete[] p0; + delete[] p1; + delete[] p2; + } + inline void find_bounds() { + pmin = pmax = p0[0]; + for(uint i=1u; i0u&&filesize==84u+50u*triangle_number) println("Loading \""+filename+"\" with "+to_string(triangle_number)+" triangles."); + else println("\rError: File \""+filename+"\" is corrupt or unsupported! Only binary .stl files are supported."); + Mesh* mesh = new Mesh(triangle_number, center); + mesh->p0[0] = float3(0.0f); // to fix warning C6001 + for(uint i=0u; ip0[i] = rotation*float3(triangle_data[ 3], triangle_data[ 4], triangle_data[ 5]); // read positions of triangle vertices and rotate them + mesh->p1[i] = rotation*float3(triangle_data[ 6], triangle_data[ 7], triangle_data[ 8]); + mesh->p2[i] = rotation*float3(triangle_data[ 9], triangle_data[10], triangle_data[11]); + } + delete[] data; + mesh->find_bounds(); + const float3 offset = -0.5f*(mesh->pmin+mesh->pmax); // auto-rescale mesh + float scale = 1.0f; + if(size>0) { // rescale to specified size + scale = size/fmax(fmax(mesh->pmax.x-mesh->pmin.x, mesh->pmax.y-mesh->pmin.y), mesh->pmax.z-mesh->pmin.z); + } else { // auto-rescale to largest possible size + const float scale_x = box_size.x/(mesh->pmax.x-mesh->pmin.x); + const float scale_y = box_size.y/(mesh->pmax.y-mesh->pmin.y); + const float scale_z = box_size.z/(mesh->pmax.z-mesh->pmin.z); + scale = fmin(fmin(scale_x, scale_y), scale_z); + } + for(uint i=0u; ip0[i] = center+scale*(offset+mesh->p0[i]); + mesh->p1[i] = center+scale*(offset+mesh->p1[i]); + mesh->p2[i] = center+scale*(offset+mesh->p2[i]); + } + mesh->find_bounds(); + return mesh; +} +inline Mesh* read_stl(const string& path, const float3& box_size, const float3& center, const float size) { // read binary .stl file (no rotation) + return read_stl(path, box_size, center, float3x3(1.0f), size); +} + class Configuration_File { private: string filename;