From 91be087b07d87db4ac0d4433f409034e3cb5861b Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Thu, 10 Aug 2023 14:36:23 +0900 Subject: [PATCH 01/18] nits: adjust color themes & add logo,faivicon --- docs/assets/favicon.png | Bin 0 -> 4099 bytes docs/assets/logo.png | Bin 0 -> 8147 bytes docs/stylesheets/extra.css | 4 ++++ mkdocs.yml | 7 +++++++ 4 files changed, 11 insertions(+) create mode 100644 docs/assets/favicon.png create mode 100644 docs/assets/logo.png create mode 100644 docs/stylesheets/extra.css diff --git a/docs/assets/favicon.png b/docs/assets/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..0464992ea020309f9ad9c9737efe7de4b12d5c8f GIT binary patch literal 4099 zcma)9`#%%@_us}a3$vJ*u`TzvYc9FPdyJ7wu9=9COBo^Fj&$CcfT++ePPgju;?1ptJ(24N=f=3l}{06?VK8jo{{Df)Rogyi|jy`PLy^>fFZ zINN78s!7$cd-oaEqJey{f)%U?Ql7I2`f&E=yUV64iYDv%L-A%Da9&J4AOouwU+Stn zsrLD0gNFQ%fM=L;jILW>v+>!bDCe(t^Lx|(7<4vzDH>^aMz5UOJ|w?e7(^-lx^rj4 zbm$s=zGyy4tM8a!_WvTv@}k^X?W!)HbFsIp+JOwwB7k9czy~1%6$DlCPPL7q@hScR zkeh?7A>;tYF%s9U(Nq=C7IuV*05-avXJb`6=pE>3$0#1#gjenRJl`$euhmkQ`I8MI zpjybo&)_7l;^xJcCVdiS;P(Qx--4}Z3{eF=H0W3@;{DZ4(G)@xsls6pBEST25Y%d# zM%5&kL>aQ=lz_)aj7T8Z;1Y}N?qsdW%_pAO3nL$HY zbJFI6&0wD;^RaKomdfWHPy7VId*Fh_>P|(+u&9FKkKhIndDW#3efO=ZRLu}ND@@X0 zSgXqc@zCmhVI5iuKlV*O6LBAG!h4{97p{g54$EHe9R|t(Tarvw)>aEB;1SgTht^FU zPNLYUcH?h)k;4U~Uq26hP~ip2J`@Z6^isTOmf;*v-Kr6uKYD_MP2WpVnrL#xAr)RC zupt2s{ifD*&u1=|vY6o)k|v(Ig?se9q*&2lx_hvtoh~XfOw^qMUedN(ENL;$ELzrZ zoh%xuN58|KqaI}suJ4}qh`eH0nGpRrl%-FE>B?6d-{(n`iK;$RT0UVA;Zq=~W~idw z-Jc{hvzR$~;sgT!e6eJAhYRMx2Leulk$L7_=cCN~UN|!D!p_qFN$Pj(C&Pt^fvi;E z;t}6CC5d{<4a(-;)|s)2_1TtG{YQgkq}3zq5>DkQR8%S*-}H)7Ny{?_#DEe+&OFGy zs?uTNZShPnK-j(II(mMu?X-tB2!U^oWNpi+rj0+cD!*xcIXHy;HRA~?h0%Peg=al)C<MAnG@#HPILYlj^!OnqEB zziH{{o_kbx$+aQPj6u$Lw0eqXYH@3nB`=+NRuD_OaBE7IXePX#8WwWT;v;4o(volS z(ae`fI9n>-v}nUWEtfXRf_nlIQ5hH3O#U>#s>W&^sD&Mk=^PJyKB3@h{ui%kgn(AG z=5=Nrbr4VM$2a?YxFn}u^YQOnGXXK3wr^Bl;-oZa7esS0KyFcxJG}7jwf%!}g~m*V zxi`QI_~c=HL{Pqt6U>}oaUX{=l@Z-SUxVyec5cUooirYEWH0DV9FYI&7;EJ$mg_um z=FNghO#BXrIaFR3ItaNa-U^HtkC9=KM1>!UQUv5>zFvE+T{*WUeQoxO-Y!47>jIB^ ziY^;@AOW`+P`_s$L`Ac%=On-(v(B?yEkEs34rVqBlL{S$eadPa%fhXlVD8amze962 zK|dAJb3V&SO`cz$aKIg$0h1I^F^+U!|^1Dq_a94y_w;sptqU00cl@jVi zKupbA)(XR-2Ar(Yd7tpu;hw{CB)lcR@j#sYS3EuKhyCN@Q4)A$#=el(-<{oWti;@2 zjiYrVG|W(S=%rqnyPy!BKgZ}~8P__irI~z}s%PlV)bGQxC2h_OBP)TSwB_-<)Yi=D zxT)s>jK4pF>&>Q>@4iMXalCP6JAWm4b&agHwY)zMd+NZ~k`cb8zf_w(FY$8gD8Rii zWL;@gW;d|zDruwFcVl z=;c}4&#cfr&iwY)AP0ex+wiY@DFB+=t_ERT>6v<_j!Xy zF8}ZlnIVfz$sX+Dmc>@f%DOVEhu1I4S|n6P&1XiD8`STyc1cbZ9ZIxI?_)Dx>Ia>g zi;7z?)Mv+<2J0G!c+g+QDc?K?^`*IucylHFX0V_5_YQg`!z5nN?Hwg@1j9y;b z)6`1E@Y>j-OnDm>3K2I((w!=nLoJUf$XKL|{p5BqWorRnrHO)p*#xz3ycr&A?=3$` z4EG(|hi|dxlZva_MF|)PREvqUVpb8;7wwt=?as z156_8?+T9vx)7Ag@K(7jdH5|<`VAnp7 zi?a=P%30ak9^W#B#@}I@#|u^P9Go!h)Hk>RXTriSHB`y2Q5kX)J__RgnHSHS<1<_+>Ja9rucu(0H^rxO%Lnz=`3xTp-I-8j-vRNu*4j*7?1WYjf#r9{Ne@_{s(7 zh#?{mAj3bMGPYJEcc!U18EGZZ1^w}BZ?W1ZlL$Ksf635RIv07woeFMZ@5(_|rI0Bx zExi$~ok_g)_NDu-s>_N;ozJsd*xO#MA20mVu)BP~)V*x1l`aBRY#i z`2wNL=Sp_Hlwr(Xa3pFj#LTvr|tV*ou)1EEe0&^lmo;zG3n6%BkAE0OM zg{vjPS8p|ewE&^m#~d)FYgq69`8j^fkzn{Da`3h~MJdTuXX_b?{%3ypHzFgTFpEh! zR;)V{HRJn1NHT@Ew}A)~-?`ixs(b3Pjyi(5Q(d;$OW+P(2{aI`jLa8r~>C?Yb%YhF{Fe zw$lw_AIt$81SY#zQ}>VT^E>x&4dJ2QE`0jth}l$1VzFe3-c4KOtb6R-P*D=-irD6} zf!jnFVO||>E-E?OYCGv%p2;a#T$r+)9dDI2IJ>D0nbTNV_<>%o%Od<@uN5qEOT7O^ z-s|`f_8z_-o$1G~@xV$3TB`2_JOp~s%Ky8Pkbh6Y!22jlxxqm%&QJL+MOi}Tl$^uy zZ!TcuvcZn9@QuW=Z(hPq0AkImPoS_ym5<}#M^N%C+e zMTuURq%jwdzd?lc9!(IaJ!c2ohk-|zm#Gh`-{|VhcrsA)Bf20D+B)q{@ID;k19gD~GD(-OgB{_| zO_sR?>hFR%knVN;r7b@z`Z(VAQ|!DPyqls_OeUlkHtn@ex^N1r;#W$BJ|VC`RoZ%6 zTD*YlKequ0{4SGchX1Diy1d5!yRPqYxs%@~w_ zZGvrR&RW5x*y_2S(ctS~ty7^TLI0}ZbL!EIeyL2&-k?#>=C@khzi4}1)$=PRN4RSY zM#hXDUupK@3!M#eR^vM2^$A$IX~I#X@n)toV{s$OCGtYyQ+0k>K&@~8vlFVc#SQr0 z)XA9T`L6UDLIn{v8mR4AyD_G`?6uSO8~VTPz)a|@}^8gj{{4-+vd%`px5HyTmRyj6t+(JUoyBM0m}&<`LeP0r*SdrXr3f*j(uIb-;V_eH zyNGXoyL?TJAAc%F-;46W3%kXirfQXgC;KJYJ!5RC zEgv`EZh5w`E#jBi0_OB6l2;g;^M$K_s7kmwX?z4H=ZPY~qP?yWB%1`}eUy6LpUk%m zY=Zh2yBxB(5V67g!!Ifolh9@HqCdPzj~gY!oIJOoZPu~^H?RE$121$ArTn}*+L)d- zrdW28Gue#pzT-lk)xwK?-cCGXmcjq-SAkO_iJ0^Sy>Jh~wMN7}n@wW9lI%_mrd24q zkgwQ3q@$=9pAj&A+@JXd_Iu62+RY__2}xcaFWjX)v(=1e;o6QG4Wy@4*>^TnXXGJq zY>RufjvY7ogvG}qeD0n1w};B7+io(@$Q#lvh9!(b_3i(VKp6EY+eH(}%#BCs}-I38fru zc526k^h>(P(lLkZ>Pn6X;O;)O(n&#bHM`uhTB!j94q1MUz*4L`w155qaTKZHBhA%w zJ4|-H?Qcp(;>^|TakPK^Z7W?3%#VY|YyMnql8D?}X5u#D;J>$8+8J17eVv2pa%XKT z<#Fa)Bl%ad@3f>ofo8&M)HrrHj1Qx^EsL7-;m;UYo}*W3oy^%_syvRJ`q~~hB=NrG z%to)4Vp$f<)(g#jy2d+7_48my7df5=`+2@MJ*P-$F)*+B*}drm3wAUp_qb&Z9R+j~ z+Ni|Mf#*ogWfC}One<+?bjr@Kogt!iYQ$apvgoKG@Tg#qBZTITB3vuh?3#rib0u?- z#Zwb&Smv0<(Z%(Y7fHGVnoqx^_B)$TN!=gaFqc4Js*HVSS=pLyOeW-+ha?^2f2xQ1 zHWd>@FdkWw9dcPZHgR>pdXuL(q0ncOVy2jz39U zKjdOo<_=pI4Xo7(yv^(B7>k^Z9S-uD8^I#x`7cxQy{(5ISQ5w!vd8({E@|taVvOy0 zjjqs2YATBFW?%<2L9|Og9PldT73TRZseqACbPO@OBw8pz;FVR8mTY*9Z>2NyF48C+ zz33wJxT(iFeluDL9kbdy_&PO6dKv_$yQo|LTm8>Rmz#$madrdUUTzseUF9Vu0T<%M z@y`Oo)33Q2XlM1JHA2QS3F-uo>tMOwtP(r&37K_2GMEZXJwkJTtaYw~W7-#k-yGQ) zlHBUl7qzDK`Sh^Go@iYs;%Jf$BzjDpOF;poa>Kxq0Q*^Ff9NF8-Y<5cRFUxbMosU< zEBoXhF0~AtV7ctlIoMsJ?jWB*E3zmOjvCQ1-7Ixe@t#cdwptA*Ev|_F^u1KYBUKBHnM$_OFG)aM$mlzhQk~w{)d7+73|Gtk3lNLp3EYM zADifUvv+}tk)v(n*EotyLeamR(2Mp-@8Sol6HFG;@qJ*PpD$xc1@`jw=u8(G9kkS>qgpy$+gXstRL&vRSt z%&M{s%Kh>F_fh5IGvPa$k}qa!7NMJo>mMro2Kxo27p+{~Cc}&`uS6=aq7LxjS2`#^ z-{?ukS14||C(nw~PQEpM{H_>KNj1|GWzl*P_t?lS_FA8LFB>!>HvG%cLONolv*xEJE{E(tc+i9A$`s+}Zki znl~hlSH9X-Ys&0^99GqWfV6m(y2-GtWdBl;E&+ykVGU8?euOOhFYlu^9AoFx139uQ z)f7-N_kOURE(ZF4)Br7oKeF#AAaG}rOqp}&gGX*C$n4L7Y~R{{LSAO?w!aaov{(3x zh{SmVi#Kpg%DuPC9RNm!jZLF)O8q`Tv?D;NT`kC=_`nETZHaqcGEjx zat6T}r2HlLbPwe5F+B)6e8GYmLn#felg2rs@XFa04!GlTEOfCDZfDN3Q6pi@!MH4x z&Ox}5K4*{t{5Apr>|VGZr#{*a09YZy)TobS0kvgBRXZMh`|gVOoHfi`@u7AgRJm^b zLzj8w{f16QLe=pJYY|~cM0fy1j79?^g;#!_jg*sNu90IULB889-klT^X*L{gXpm=~ z1}~xTbnCx@aq?W5iS;HU5_n}r%AgD<4-)Q`L6^zzoK;~Lcurv6)S%pu>pUlOh%rA8 zyfSgUI%x}}K6{*^jlc$|w3Gi~axC7!Tn902ff_~um@LPERb8$n^KmfcN+<_pR#W@sgXaT{d8f`#X3xvyD9WR^pC=@&q8 zkV+E^Us-0)Y8+kZ$2nzy1uLFg?G$Glkz$oa=stgvQ@^slWKnq9)!n3P1XwCJxdjQ& zDcy=5TG7{);N0Tip8uGTEk;&?D0^HZa3%2Bqoce-WRb0f0ckBJrX(a#XWU&7Z0)!E zmj{NHzh*<>dRyeOM-+#cc~JoArxqRsVyV3_B3;3zn;u;mJ6p4E!D+=`G-sT5S`1@n z^$B@$?d@PPfcp%gJ0*BotE_x=ap=o>3i{_+%u*pNKUA6& zFKK3%h`QUP_z8~&{I}Xv*)mJV1arojd(p>I!uOCa<`soMf@6A*Zrai1 zH+JC0t|;3V3jV@x&EPl|KEhA5mG`2b&D3A44B|?@co>rVgTS-E4!(w;6Fx|2>Cs1C zBUOvX(@GzkeQjSYuIOvlGlar3+8*c(KaU3o+KdWIk1l{~Jy^uf6q?D=N&_Uc7X15n z$6<})Yi?s``onhLLpRvQM&DC+|ZrzEn71_tVpDux=^rbqq|f27Q>K9Mr@U# zNC-VZV%?+3X4ADsmWOH_ITllBg3e)=p__0_cEB*>Z7+h+W%>Q)>AV^LIB};#+b;2q z&j}u!H<^+g8gcmkWMlHkyDkaE@%_et-uRv8{)KOlGge^XF8F>QOcaZj;Sq~1IibPu z%T3!wp2Yt1nKM3A6BI6x&Q^o{6D!x*r1xw1;vdI^gZvUlYNkf69N)z*#D7qBFZgmdY0{AiMQM2&sOF zG3OqCKD&A~BB3D@B~Z7_7K{ z@#pLXov*G`_MJ0@$-HOze|VVPRu^vCV+6{92R+)WU^?IFr8Bb=JgGQiY4gy#Uh|P5 zf_FV=^T@_F`XS>Tq~Y(C>tRxN1G#Z-g5*by8F?Z>P7l9g}KygwRVi>lf?LeuysPqfjut$k2#@SN%Qoy_|7v?0Uy*% z$s$4BtT92`)#3TaV8lC^1kqzoeeS|;(tbf$SUfC2QQ_?K&r`fwLy-iV-|k-rpL~Eq zxvINxGMIKF=f^{w6yZ%8EXehK#2lSd_6VDxxMsbPw*t#Im_zxq`}M+Hv*!r z%nfFq9#zVR0IzY=l0~8kcL&4zKJL&`SZsZKS_`719YPAFY&)e zriGh+OO4S3)9m|WUu&phE2uk+Fi*dlkr#=m90Dw6efTo=fwMka4T=VBYeUB_XI3Jh zT#A;XX0^#^TmodbFfWC(-27iAJ?(~0Gvf}=A3xa$vl19*@)x#m@~@DenbPIIfhVlJ z(MF(MHUOK7NKlW{%b8t+jM(q`VN1r9`1ZveB0P)LeB&o6-((_hki>2SXN8t8>8Qtv zZ^l`$8jW|g#rI0aASv!VyS8n%ZS8REwERq=gX?1XW>S+y`VhWHs-xbFnciJ?L|+le+$?<7CJOA=)AN#Q`4QYN9n7E*tg*zd`F zI;wU>BPOw2yo10?Ufew0d85&#+FxqmTu>^BTL(L}u5I>~cKOE@6%-%LQR`P=5vt&Y z*6J7Y%CI|DdoR5foO_am=RAVL+Q^{v`RVs(gyzh8P%rUB3})QIDp{D2cz)vAo_9yf z;6>!Qr#^&^Kh?7()XH`72B?mRfVE}VUu;%X3eQkok~TUwhu;%EF(;u!NKM{yemtwbyRbRn2F))lrF zg-`i>+w$pkVCpZ;m8rq~=Bnh*v_-^mzonbbT8NFAGQmOTI$?3@3{>hVY9poxeb)|2 zfbwewx~@>vxTkpr91Cd!^@f~OjG}+JaJ05N>G+MA!Jcg-Uy+?&r3&8Wv8FM?*AENC zjX}E3c-NWwZx^X(I~+s1e;T?0Fx(9Cm^DKn5psE&_mAmV0jcPeJ+9F6XbdPA8Gkdw zLZ<`SYNRr()`P?3S;BPaW%Kjx~y-oY{aO0`nZ^48`Rg2PLYeB*03F@f_h8zY~ z;;lx5OkEXz7`krpS5WNw15UuYuTD_y=nrzZx`or+d*-X;B$ktGfAdv(=1gVi=2#N68KJC7_d1EV|pZIG}-riWP`DvwV)9N?%$@DbgEj0*vDj9L<9vr72yumBm?_C zA&G3(^bAxr-S}FFz@3y~vPDin1w@k17wPF*Hu51;11gE$A0&%xAZ~&$n6)O#v7)wR z5xC-bp9>7A9Qfoc&K-WwK}KIcTDp{`1QjG<;&@)dAhGeR-69w@eOy|XXr2wtuAL1E zMv~-~k0+s?o>?n-`@ra}<-#EyP-9$3g0RsPmx~s*G$`sSlA*4K`vF?>tYt$dS%CAE zD8NZafgDvVPMnTe`56Mo7%T5D_p_(wX#K5Q&RMV(my0607RYl~U7gn$S0-udf^-wq zs2e>)l@7|W6iUF%jmP1jaJyQ-MoMeEF3C^isjdeV%c{F~PCM=wT+IisCpJT5JO`JX zpxPKHs@wQ+0|M6r3JPh)lh@nnCtmz6ckRZ3s1#4Kk422C)&adgyt8maTgI?&c|dS$ zXQtJZFumjGO?#0rX1_mSMLLgb#penje-4+`hA9HdmWL- zJn+CJXV$F`;jF5Yk-R5hqdZb{W;e&dzO|!j_a*p(zD>tg`D~TAujhSb>+UWC1{$tx zsyJ5Y1WKfiL$~Ig1s4C-O}Tb*Qgd!K1{Pnsp>Jqe?suF}vH6Z*v_)Wd1-{e zsW!|BJ*&R44^2KhV$-1jAfSaZiWsiM(T5SEbVGh%w#6lVTUl1tn3rt%8JEdtni(|B z_Flxg=uP&x^E&Flh@qEh5LT*6&U&1}ql>n7)cEWn);dP)ED1~l?@y7lQtbt9NC$aF z_df#)w**v12JKEu!A-=q?h;49p*ts&AaMr2twHADR#noSoK*d?RFs_=S=!ds_o49bgyi+ zCAEsVAVkFqTHj^6Xu^315`M?3mEYl}bjlsLWt^V5_X z(u-|#r=_SliXOD?-4t|a1}&Hp5t%2{Xi@|(gYJjCAeZwm3~drUsUK2`YVB~UV!|?c z%?{f#v?g!aVx4b2PCNXPKNh;TJcM!jya9GcAHtMera~OCu<0=5WDbpv$$qW$z4hxt zchVe+;Ptihz_$7>N_|MK9Fw@KB#`DCY;l;Jr6Q|NpvewhKaDdiU;HwA^P7gH2{q?6 zcQIAE)e~gb^X|!K!bd&~jcMw!t#hklq zUJhC}n36C39o0Bm2;U*k(tg!&Q`=<==Y`v1K8A4>pwc!aF96bjS1+z&jaIz^2Sl#o zwn7sdCv6~r`oDu0&{5`x0H}q|18W(nuJY-D!Hr?ymNQu%mmX+~b6L-ESj~Db>24|L zU9}KP2@xkKb^*TCs5yj*Fwwipq)$l^1}?}qsEK4cYWP%vIK;1d#Tt8s0GE%1pdVc&k)P3YF276<0t9olSYge zZph0d@{;&L_u<8QA=E88|3F`cfO^nvbI}IG?HqL5nQ>2%9R20CVCqVnZFLDIWuKri z7-$4<1^#o#k-D2nBM4AoDmAd*B5?W6F$Tn)9Q19lo;t@&MDl%64)S<)Dm3YCppz3I zZQ2L`o!A~iHK=_8Wq3eBNpnE5>hdh2na*MM^9&jfu5$^xhX*7vGfW%U`w=*ND-&xB z6pJtX_#gXtb|!)64sOsFIt>t+0sRQfU#Yx{Gz>|GX0!nDxh=zzx$r2DQX)=BX+j@n zyT-Myx>ED7307BTz??-RgN>YJ9iD7r_v(nLV(p&DWqRS8T!Ew+L0NWYR;@@ZJ zq_g|r3eTg!zi2s@%*_acs()SOtzNhP7lF-aQ?iZ|?4ca%a%;i3#7Vz?1gfSA$~z%P z5kTG1vaHLU4JXhifk`G%jygbudd2z=(Jgw5#P?)gQq5586E+CF-M!cUd3@^xS29kO zq_PLZzcPcSeBbLoVPE!GR!p24No6%ZkY~U_$1*M<%;Z^@DRdMYB%yIpMyrNGlJOyf zJ(K0jI?_k+$HcnU{{UTZZ_)}QshF={cj2NsPwc+zsbfk$+lmJAw>UQ~v}i-%KHP4R zFbV8OAp8I4iaroQfgH=F)wE0LhyVX;f`hE#9EzaP#>AQkU#?b4i#_ma=DlB}c{|C_ Date: Mon, 21 Aug 2023 11:41:21 +0900 Subject: [PATCH 02/18] docs: add mkdocs-jupyter --- docs/requirements.txt | 1 + docs/user_guide.ipynb | 82 +++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 3 ++ 3 files changed, 86 insertions(+) create mode 100644 docs/user_guide.ipynb diff --git a/docs/requirements.txt b/docs/requirements.txt index 9314de17..c7480938 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ mkdocs-material mkdocstrings[python] +mkdocs-jupyter diff --git a/docs/user_guide.ipynb b/docs/user_guide.ipynb new file mode 100644 index 00000000..08ea8752 --- /dev/null +++ b/docs/user_guide.ipynb @@ -0,0 +1,82 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Furiosa Model Zoo User Guide\n", + "모델 주는 FuriosaAI NPU를 사용해서 가속할 수 있는 모델들을 쉽게 사용할 수 있도록 모아놓은 프로젝트입니다.\n", + "먼저, 사용 가능한 모델들을 살펴보겠습니다." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['EfficientNetB0', 'EfficientNetV2s', 'ResNet50', 'SSDMobileNet', 'SSDResNet34', 'YOLOv5l', 'YOLOv5m']\n" + ] + } + ], + "source": [ + "from furiosa.models import vision\n", + "\n", + "\n", + "print(dir(vision))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------+------------------------------+----------------------+-------------------------+\n", + "| Model name | Model description | Task type | Available postprocesses |\n", + "+-----------------+------------------------------+----------------------+-------------------------+\n", + "| ResNet50 | MLCommons ResNet50 model | Image Classification | Python |\n", + "| SSDMobileNet | MLCommons MobileNet v1 model | Object Detection | Python, Rust |\n", + "| SSDResNet34 | MLCommons SSD ResNet34 model | Object Detection | Python, Rust |\n", + "| YOLOv5l | YOLOv5 Large model | Object Detection | Rust |\n", + "| YOLOv5m | YOLOv5 Medium model | Object Detection | Rust |\n", + "| EfficientNetB0 | EfficientNet B0 model | Image Classification | Python |\n", + "| EfficientNetV2s | EfficientNetV2-s model | Image Classification | Python |\n", + "+-----------------+------------------------------+----------------------+-------------------------+\n" + ] + } + ], + "source": [ + "!furiosa-models list" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "models", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mkdocs.yml b/mkdocs.yml index cab36519..3d5dee06 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,6 +20,7 @@ repo_url: https://github.com/furiosa-ai/furiosa-models nav: - Overview: index.md - getting_started.md +- user_guide.ipynb - model_object.md - command_tool.md - Models: @@ -50,6 +51,8 @@ markdown_extensions: plugins: - search +- mkdocs-jupyter: + ignore: ["examples/*.py"] - mkdocstrings: default_handler: python handlers: From a68f1e8817626b6a4478882891f97122e0d570e2 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Tue, 22 Aug 2023 11:01:14 +0900 Subject: [PATCH 03/18] ci: test docs notebooks too --- Makefile | 7 +++++-- docker/Dockerfile | 3 +-- docs/stylesheets/extra.css | 1 + pyproject.toml | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 81003e70..cb6e9b7c 100644 --- a/Makefile +++ b/Makefile @@ -4,8 +4,8 @@ ONNXRUNTIME_VERSION := 1.15.1-? TOOLCHAIN_VERSION := 0.10.0-? LIBHAL_VERSION := 0.11.0-? -.PHONY: check-docker-tag toolchain lint test unit_tests examples regression-test-all \ -regression-test-resnet50 regression-test-ssd-mobilenet \ +.PHONY: check-docker-tag toolchain lint test unit_tests notebook_tests examples \ +regression-test-all regression-test-resnet50 regression-test-ssd-mobilenet \ regression-test-ssd-resnet34 regression-test-yolov5 doc docker-build docker-push \ regression-test-efficientnet-b0 regression-test-efficientnet-v2-s @@ -33,6 +33,9 @@ test: unit_tests: pytest ./tests/unit/ -s +notebook_tests: + pytest --nbmake ./docs + examples: for f in $$(find docs/examples/ -name *.py); do printf "\n[TEST] $$f ...\n"; python3 $$f || exit 1; done diff --git a/docker/Dockerfile b/docker/Dockerfile index 7a0ad2df..ba0d1a83 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -14,8 +14,7 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ && apt update && apt install gh -y -RUN pip3 install --upgrade pip wheel setuptools Cython pytest pycocotools \ - dvc[s3] pytest-benchmark +RUN pip3 install --upgrade pip wheel setuptools Cython dvc[s3] RUN echo "deb [arch=amd64] https://internal-archive.furiosa.dev/ubuntu focal restricted" \ > /etc/apt/sources.list.d/furiosa.list && \ diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 1c1fde73..dc5e1d63 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -1,4 +1,5 @@ :root { --md-primary-fg-color: #780006; + --md-primary-fg-color--dark: #460101; --md-accent-fg-color: #e21500; } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5bc8b159..8c78bb48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ test = [ "pytest", "pytest-benchmark == 4.0.0", "pytest-asyncio ~= 0.17.2", + "nbmake", "pycocotools ~= 2.0.4", ] dvc = ["dvc[s3]"] From 4d2b306a1db3b7c963909920202e9395a9304f2d Mon Sep 17 00:00:00 2001 From: "hyeonu.park" Date: Mon, 14 Aug 2023 19:33:05 +0900 Subject: [PATCH 04/18] fix: replace `Mode.load[_async]()`, `use_native`, `.source` in docs Signed-off-by: Myeong-geun Shin --- docs/getting_started.md | 2 +- docs/model_object.md | 11 ++++++----- docs/models/ssd_mobilenet.md | 4 ++-- docs/models/ssd_resnet34.md | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index c9194b4d..35b49003 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -28,7 +28,7 @@ pip install 'furiosa-models' ``` git clone https://github.com/furiosa-ai/furiosa-models - pip install . + pip install ./furiosa-models ``` ## Quick example and Guides diff --git a/docs/model_object.md b/docs/model_object.md index d9afed08..ba33050a 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -4,13 +4,13 @@ In `furiosa-models` project, `Model` is the first class object, and it represent This document explains what [`Model`][furiosa.models.types.Model] object offers and their usages. ## Loading a pre-trained model -To load a pre-trained neural-network model, you need to call `load()` method. +To load a pre-trained neural-network model, you need to call the `Model` object. Since the sizes of pre-trained model weights vary from tens to hundreds megabytes, -the model images are not included in Python package. When `load()` method is called, a pre-trained model will be +the model images are not included in Python package. First time the model object is called, a pre-trained model will be fetched over the network. It takes some time (usually few seconds) depending on models and network conditions. Once the model images are fetched, they will be cached on a local disk. -=== "Blocking API" +=== "Load module" ```python --8<-- "docs/examples/loading_model.py" ``` @@ -40,7 +40,7 @@ To create a session, pass the `enf` field of the model object to the furiosa.run * [Python SDK Installation and User Guide](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html) * [Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/latest/en/software/tutorials.html) -Passing `Model.source` to `session.create()` allows users to start from source models in ONNX or tflite and customize models to their specific use-cases. This customization includes options such as specifying batch sizes and compiler configurations for optimization purposes. For additional information on Model.source, please refer to [Accessing artifacts and metadata](#accessing_artifacts_and_metadata). +Passing `Model.origin` to `session.create()` allows users to start from source models in ONNX or tflite and customize models to their specific use-cases. This customization includes options such as specifying batch sizes and compiler configurations for optimization purposes. For additional information on Model.origin, please refer to [Accessing artifacts and metadata](#accessing_artifacts_and_metadata). To utilize f32 source models, it is necessary to perform calibration and quantization. Pre-calibrated data is readily available in Furiosa-models, facilitating direct access to the quantization process. @@ -98,7 +98,8 @@ In sum, typical steps of a single inference is as the following, as also shown a such as CPU cache, SIMD instructions and CPU pipelining. According to our benchmark, the native implementations show at most 70% lower latency. - To use native post processor, please pass `use_native=True` to `Model.load()` or `Model.load_async()`. + To use native post processor, please pass `postprocessor_type="Rust"` to `Model()`. + The following is an example to use native post processor for [SSDMobileNet](models/ssd_mobilenet.md). You can find more details of each model page. diff --git a/docs/models/ssd_mobilenet.md b/docs/models/ssd_mobilenet.md index 788df486..ccef8425 100644 --- a/docs/models/ssd_mobilenet.md +++ b/docs/models/ssd_mobilenet.md @@ -74,8 +74,8 @@ You can find examples at [SSDMobileNet Usage](#SSDMobileNet_Usage). This class provides another version of the postprocessing implementation which is highly optimized for NPU. The implementation leverages the NPU IO architecture and runtime. -To use this implementation, when this model is loaded, the parameter `use_native=True` -should be passed to `load()` or `load_aync()`. The following is an example: +To use this implementation, when this model is called, the parameter `postprocessor_type="Rust"` +should be passed. The following is an example: !!! Example ```python diff --git a/docs/models/ssd_resnet34.md b/docs/models/ssd_resnet34.md index fb42da49..601a9b85 100644 --- a/docs/models/ssd_resnet34.md +++ b/docs/models/ssd_resnet34.md @@ -75,8 +75,8 @@ You can find examples at [SSDResNet34 Usage](#SSDResNet34_Usage). This class provides another version of the postprocessing implementation which is highly optimized for NPU. The implementation leverages the NPU IO architecture and runtime. -To use this implementation, when this model is loaded, the parameter `use_native=True` -should be passed to `load()` or `load_aync()`. The following is an example: +To use this implementation, when this model is called, the parameter `postprocessor_type="Rust"` +should be passed. The following is an example: !!! Example ```python From bbc1fcc5e147f63e151226d094a22f697cb8277e Mon Sep 17 00:00:00 2001 From: "hyeonu.park" Date: Mon, 14 Aug 2023 20:23:43 +0900 Subject: [PATCH 05/18] fix: replace `calib_range` in docs Signed-off-by: Myeong-geun Shin --- docs/examples/ssd_mobilenet_onnx.py | 4 ++-- docs/model_object.md | 9 +++------ furiosa/models/types.py | 23 ++++++++++++++--------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/examples/ssd_mobilenet_onnx.py b/docs/examples/ssd_mobilenet_onnx.py index b1244fc1..b5a80294 100644 --- a/docs/examples/ssd_mobilenet_onnx.py +++ b/docs/examples/ssd_mobilenet_onnx.py @@ -8,11 +8,11 @@ mobilenet = SSDMobileNet() onnx_model: bytes = mobilenet.origin -calib_range: dict = mobilenet.tensor_name_to_range +tensor_name_to_range: dict = mobilenet.tensor_name_to_range # See https://furiosa-ai.github.io/docs/latest/en/api/python/furiosa.quantizer.html#furiosa.quantizer.quantize # for more details -quantized_onnx = quantize(onnx_model, calib_range) +quantized_onnx = quantize(onnx_model, tensor_name_to_range) with create_runner(quantized_onnx, compiler_config=compiler_config) as runner: inputs, contexts = mobilenet.preprocess(image, with_scaling=True) diff --git a/docs/model_object.md b/docs/model_object.md index ba33050a..0decd5d1 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -17,7 +17,7 @@ Once the model images are fetched, they will be cached on a local disk. ## Accessing artifacts and metadata -A `Model` object includes model artifacts, such as ONNX, tflite, calibration range in yaml format, and ENF. +A `Model` object includes model artifacts, such as ONNX, tflite, mapping from a tensor name to the tensor's min and max, and ENF. ENF format is [FuriosaAI Compiler](https://furiosa-ai.github.io/docs/latest/en/software/compiler.html) specific format. Once you have the ENF file, you can reuse it to omit the compilation process that take up to minutes. @@ -27,6 +27,7 @@ In addition, a `Model` object has various metadata. The followings are all attri ::: furiosa.models.types.Model options: show_source: true + show_symbol_type_toc: true ## Inferencing with Session API @@ -45,15 +46,11 @@ Passing `Model.origin` to `session.create()` allows users to start from source m To utilize f32 source models, it is necessary to perform calibration and quantization. Pre-calibrated data is readily available in Furiosa-models, facilitating direct access to the quantization process. For manual quantization of the model, you can install the `furiosa-quantizer` package, which can be found at this [package link](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html#quantizer). -The calib_range field of the model class represents this pre-calibrated data. +The tensor_name_to_range field of the model class represents this pre-calibrated data. After quantization, the output will be in the form of FuriosaAI's IR which can then be passed to the session. At this stage, the compiler configuration can be specified. -!!!Info - The calibration range field is actually in yaml format but serialized in string type. - To deserialize the calibration range, use `import yaml; yaml.full_load(calib_range)`. - !!! Example === "Using pre-compiled ENF binary" diff --git a/furiosa/models/types.py b/furiosa/models/types.py index 7eb4e450..b4772684 100644 --- a/furiosa/models/types.py +++ b/furiosa/models/types.py @@ -103,14 +103,6 @@ class Model(ABC, BaseModel): tensor_name_to_range: the calibration ranges of each tensor in origin preprocessor: a preprocessor to preprocess input tensors postprocessor: a postprocessor to postprocess output tensors - - Methods: - preprocess: preprocess input tensors - postprocess: postprocess output tensors - model_source(num_pe=[1|2]): the executable binary for furiosa runtime and NPU. It can be - directly fed to `furiosa.runtime.create_runner`. If model binary is not compiled yet, - it will be quantized & compiled automatically if possible - resolve_all: resolve all non-cached properties(origin, tensor_name_to_range, model_sources) """ model_config = ConfigDict(arbitrary_types_allowed=True) @@ -129,9 +121,13 @@ class Model(ABC, BaseModel): postprocessor: PostProcessor = Field(..., repr=False, exclude=True) def preprocess(self, *args, **kwargs) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: + """preprocess input tensors + """ return self.preprocessor(*args, **kwargs) def postprocess(self, *args, **kwargs): + """postprocess output tensors + """ return self.postprocessor(*args, **kwargs) @computed_field(repr=False) @@ -145,7 +141,14 @@ def tensor_name_to_range(self) -> Dict[str, List[float]]: calib_yaml = resolve_source(self._artifact_name, EXT_CALIB_YAML) return yaml.full_load(calib_yaml) - def model_source(self, num_pe: int = 2) -> bytes: + def model_source(self, num_pe: [1|2] = 2) -> bytes: + """the executable binary for furiosa runtime and NPU. It can be + directly fed to `furiosa.runtime.create_runner`. If model binary is not compiled yet, + it will be quantized & compiled automatically if possible + + Args: + num_pe: number of PE to be used. + """ if num_pe not in (1, 2): raise ValueError(f"Invalid num_pe: {num_pe}") @@ -153,6 +156,8 @@ def model_source(self, num_pe: int = 2) -> bytes: return resolve_model_source(self._artifact_name, num_pe=num_pe) def resolve_all(self): + """resolve all non-cached properties(origin, tensor_name_to_range, model_sources) + """ _ = self.origin, self.tensor_name_to_range for num_pe in (1, 2): _ = self.model_source(num_pe=num_pe) From d135619e5cef67459dd0f07fba97961d2caa9052 Mon Sep 17 00:00:00 2001 From: "hyeonu.park" Date: Wed, 16 Aug 2023 17:11:24 +0900 Subject: [PATCH 06/18] apply review Signed-off-by: Myeong-geun Shin --- docs/examples/ssd_mobilenet_native.py | 3 ++- docs/examples/ssd_resnet34_native.py | 3 ++- docs/model_object.md | 2 +- docs/models/ssd_mobilenet.md | 2 +- docs/models/ssd_resnet34.md | 2 +- furiosa/models/types.py | 4 ++-- tests/bench/test_ssd_mobilenet.py | 4 ++-- tests/bench/test_ssd_resnet34.py | 4 ++-- 8 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/examples/ssd_mobilenet_native.py b/docs/examples/ssd_mobilenet_native.py index 4f071214..efb1b807 100644 --- a/docs/examples/ssd_mobilenet_native.py +++ b/docs/examples/ssd_mobilenet_native.py @@ -1,9 +1,10 @@ from furiosa.models.vision import SSDMobileNet +from furiosa.models.types import Platform from furiosa.runtime.sync import create_runner image = ["tests/assets/cat.jpg"] -mobilenet = SSDMobileNet(postprocessor_type="Rust") +mobilenet = SSDMobileNet(postprocessor_type=Platform.RUST) with create_runner(mobilenet.model_source()) as runner: inputs, contexts = mobilenet.preprocess(image) outputs = runner.run(inputs) diff --git a/docs/examples/ssd_resnet34_native.py b/docs/examples/ssd_resnet34_native.py index 1caf42fd..916f08c0 100644 --- a/docs/examples/ssd_resnet34_native.py +++ b/docs/examples/ssd_resnet34_native.py @@ -1,7 +1,8 @@ from furiosa.models.vision import SSDResNet34 +from furiosa.models.types import Platform from furiosa.runtime.sync import create_runner -resnet34 = SSDResNet34(postprocessor_type="Rust") +resnet34 = SSDResNet34(postprocessor_type=Platform.RUST) with create_runner(resnet34.model_source()) as runner: image, contexts = resnet34.preprocess(["tests/assets/cat.jpg"]) diff --git a/docs/model_object.md b/docs/model_object.md index 0decd5d1..7ca96f7f 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -95,7 +95,7 @@ In sum, typical steps of a single inference is as the following, as also shown a such as CPU cache, SIMD instructions and CPU pipelining. According to our benchmark, the native implementations show at most 70% lower latency. - To use native post processor, please pass `postprocessor_type="Rust"` to `Model()`. + To use native post processor, please pass `postprocessor_type=Platform.RUST` to `Model()`. The following is an example to use native post processor for [SSDMobileNet](models/ssd_mobilenet.md). You can find more details of each model page. diff --git a/docs/models/ssd_mobilenet.md b/docs/models/ssd_mobilenet.md index ccef8425..6b3e54f3 100644 --- a/docs/models/ssd_mobilenet.md +++ b/docs/models/ssd_mobilenet.md @@ -74,7 +74,7 @@ You can find examples at [SSDMobileNet Usage](#SSDMobileNet_Usage). This class provides another version of the postprocessing implementation which is highly optimized for NPU. The implementation leverages the NPU IO architecture and runtime. -To use this implementation, when this model is called, the parameter `postprocessor_type="Rust"` +To use this implementation, when this model is called, the parameter `postprocessor_type=Platform.RUST` should be passed. The following is an example: !!! Example diff --git a/docs/models/ssd_resnet34.md b/docs/models/ssd_resnet34.md index 601a9b85..3ddb9b86 100644 --- a/docs/models/ssd_resnet34.md +++ b/docs/models/ssd_resnet34.md @@ -75,7 +75,7 @@ You can find examples at [SSDResNet34 Usage](#SSDResNet34_Usage). This class provides another version of the postprocessing implementation which is highly optimized for NPU. The implementation leverages the NPU IO architecture and runtime. -To use this implementation, when this model is called, the parameter `postprocessor_type="Rust"` +To use this implementation, when this model is called, the parameter `postprocessor_type=Platform.RUST` should be passed. The following is an example: !!! Example diff --git a/furiosa/models/types.py b/furiosa/models/types.py index b4772684..e09ffde1 100644 --- a/furiosa/models/types.py +++ b/furiosa/models/types.py @@ -2,7 +2,7 @@ import datetime from enum import Enum from functools import cached_property -from typing import Any, Dict, List, Optional, Sequence, Tuple +from typing import Any, Dict, List, Optional, Sequence, Tuple, Literal import numpy.typing as npt from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer @@ -141,7 +141,7 @@ def tensor_name_to_range(self) -> Dict[str, List[float]]: calib_yaml = resolve_source(self._artifact_name, EXT_CALIB_YAML) return yaml.full_load(calib_yaml) - def model_source(self, num_pe: [1|2] = 2) -> bytes: + def model_source(self, num_pe: Literal[1, 2] = 2) -> bytes: """the executable binary for furiosa runtime and NPU. It can be directly fed to `furiosa.runtime.create_runner`. If model binary is not compiled yet, it will be quantized & compiled automatically if possible diff --git a/tests/bench/test_ssd_mobilenet.py b/tests/bench/test_ssd_mobilenet.py index eb4319e2..ca10986e 100644 --- a/tests/bench/test_ssd_mobilenet.py +++ b/tests/bench/test_ssd_mobilenet.py @@ -7,7 +7,7 @@ from pycocotools.cocoeval import COCOeval import tqdm -from furiosa.models.types import Model +from furiosa.models.types import Model, Platform from furiosa.models.vision import SSDMobileNet from furiosa.runtime.sync import create_runner @@ -75,7 +75,7 @@ def workload(image_id, image): def test_mlcommons_ssd_mobilenet_with_native_rust_pp_accuracy(benchmark): - model = SSDMobileNet(postprocessor_type="Rust") + model = SSDMobileNet(postprocessor_type=Platform.RUST) image_directory, coco = load_coco_from_env_variable() image_src_iter = iter(tqdm.tqdm(coco.dataset["images"])) diff --git a/tests/bench/test_ssd_resnet34.py b/tests/bench/test_ssd_resnet34.py index ff56a014..5701f241 100644 --- a/tests/bench/test_ssd_resnet34.py +++ b/tests/bench/test_ssd_resnet34.py @@ -8,7 +8,7 @@ from pycocotools.cocoeval import COCOeval import tqdm -from furiosa.models.types import Model +from furiosa.models.types import Model, Platform from furiosa.models.vision import SSDResNet34 from furiosa.runtime.sync import create_runner @@ -83,7 +83,7 @@ def workload(image_id, image): def test_mlcommons_ssd_resnet34_with_native_rust_pp_accuracy(benchmark): - model = SSDResNet34(postprocessor_type="Rust") + model = SSDResNet34(postprocessor_type=Platform.RUST) image_directory, coco = load_coco_from_env_variable() instances_val2017 = Path( From ccdf31afd444f05b70243d2a461818bc0609c625 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Thu, 24 Aug 2023 16:43:18 +0900 Subject: [PATCH 07/18] nits: isort & run jupyter test in ci --- docs/examples/ssd_mobilenet_native.py | 2 +- docs/examples/ssd_resnet34_native.py | 2 +- furiosa/models/types.py | 11 ++++------- tekton/furiosa-models-ci/test.yaml | 3 ++- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/examples/ssd_mobilenet_native.py b/docs/examples/ssd_mobilenet_native.py index efb1b807..9352f877 100644 --- a/docs/examples/ssd_mobilenet_native.py +++ b/docs/examples/ssd_mobilenet_native.py @@ -1,5 +1,5 @@ -from furiosa.models.vision import SSDMobileNet from furiosa.models.types import Platform +from furiosa.models.vision import SSDMobileNet from furiosa.runtime.sync import create_runner image = ["tests/assets/cat.jpg"] diff --git a/docs/examples/ssd_resnet34_native.py b/docs/examples/ssd_resnet34_native.py index 916f08c0..f9bd5783 100644 --- a/docs/examples/ssd_resnet34_native.py +++ b/docs/examples/ssd_resnet34_native.py @@ -1,5 +1,5 @@ -from furiosa.models.vision import SSDResNet34 from furiosa.models.types import Platform +from furiosa.models.vision import SSDResNet34 from furiosa.runtime.sync import create_runner resnet34 = SSDResNet34(postprocessor_type=Platform.RUST) diff --git a/furiosa/models/types.py b/furiosa/models/types.py index e09ffde1..2e972a95 100644 --- a/furiosa/models/types.py +++ b/furiosa/models/types.py @@ -2,7 +2,7 @@ import datetime from enum import Enum from functools import cached_property -from typing import Any, Dict, List, Optional, Sequence, Tuple, Literal +from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple import numpy.typing as npt from pydantic import BaseModel, ConfigDict, Field, computed_field, field_serializer @@ -121,13 +121,11 @@ class Model(ABC, BaseModel): postprocessor: PostProcessor = Field(..., repr=False, exclude=True) def preprocess(self, *args, **kwargs) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: - """preprocess input tensors - """ + """preprocess input tensors""" return self.preprocessor(*args, **kwargs) def postprocess(self, *args, **kwargs): - """postprocess output tensors - """ + """postprocess output tensors""" return self.postprocessor(*args, **kwargs) @computed_field(repr=False) @@ -156,8 +154,7 @@ def model_source(self, num_pe: Literal[1, 2] = 2) -> bytes: return resolve_model_source(self._artifact_name, num_pe=num_pe) def resolve_all(self): - """resolve all non-cached properties(origin, tensor_name_to_range, model_sources) - """ + """resolve all non-cached properties(origin, tensor_name_to_range, model_sources)""" _ = self.origin, self.tensor_name_to_range for num_pe in (1, 2): _ = self.model_source(num_pe=num_pe) diff --git a/tekton/furiosa-models-ci/test.yaml b/tekton/furiosa-models-ci/test.yaml index 9bddce20..2fdca857 100644 --- a/tekton/furiosa-models-ci/test.yaml +++ b/tekton/furiosa-models-ci/test.yaml @@ -25,7 +25,7 @@ spec: - name: image type: image steps: - - name: unittests + - name: unittests-and-document-notebook-test env: - name: "PIP_EXTRA_INDEX_URL" value: "https://internal-pypi.furiosa.dev/simple" @@ -38,6 +38,7 @@ spec: # FIXME: Remove me when TLS problem is solved (https://github.com/furiosa-ai/furiosa-sdk-private/issues/719) export LD_PRELOAD=$(find $(python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')/torch/lib/ -name "libgomp*"):$LD_PRELOAD make unit_tests + make notebook_tests resources: requests: From 67c6de45c82cc085b41c2b04fc23edc578a6f97c Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Mon, 28 Aug 2023 13:20:15 +0900 Subject: [PATCH 08/18] docs: update changelog & docs --- docs/changelog.md | 19 ++ .../{command_tool.md => command_line_tool.md} | 2 +- docs/getting_started.md | 15 +- docs/model_object.md | 6 +- .../collect_calibration_ranges.ipynb | 21 ++ .../navigate_models.ipynb} | 80 ++++- .../quantize_and_compile_model.ipynb | 277 ++++++++++++++++++ .../serving_with_furiosa_serving.ipynb | 21 ++ furiosa/models/types.py | 11 +- mkdocs.yml | 8 +- 10 files changed, 443 insertions(+), 17 deletions(-) rename docs/{command_tool.md => command_line_tool.md} (99%) create mode 100644 docs/tutorials/collect_calibration_ranges.ipynb rename docs/{user_guide.ipynb => tutorials/navigate_models.ipynb} (52%) create mode 100644 docs/tutorials/quantize_and_compile_model.ipynb create mode 100644 docs/tutorials/serving_with_furiosa_serving.ipynb diff --git a/docs/changelog.md b/docs/changelog.md index 736bf256..df4b4697 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,24 @@ # Changelog +## [0.10.0 - 2023-08-28] + +### New Features +- Provide 1pe artifacts too #158 + +### Improvements +- Resolve artifact binaries lazily #155 +- Added `ruff` linter #160 +- Automatically build documentation on comment #162 +- Upgrade `pydantic` library version to `2.0.0` #166 +- Added `model.resolve_all()` to resolve all lazily loaded fields at once #166 +- Added local artifact binary cache #166 + +## Removed +- Removed unused `timm` dependency #149 +- **Breaking:** Now uses default Python initializer instead of `model.load()` #166 +- **Breaking:** `model.enf` field has been removed #166 +- **Breaking:** `model.source`, `model.calib_yaml` fields have been renamed #166 + ## [0.9.1 - 2023-05-26] ## [0.9.0 - 2023-05-12] diff --git a/docs/command_tool.md b/docs/command_line_tool.md similarity index 99% rename from docs/command_tool.md rename to docs/command_line_tool.md index ae967da1..03d8f0da 100644 --- a/docs/command_tool.md +++ b/docs/command_line_tool.md @@ -1,4 +1,4 @@ -# Command Tool +# Command Line Tool We provide a simple command line tool called `furiosa-models` to allow users to evaluate or run quickly one of models with FuriosaAI NPU. diff --git a/docs/getting_started.md b/docs/getting_started.md index 35b49003..20320fb5 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -23,6 +23,12 @@ pip install --upgrade pip setuptools wheel pip install 'furiosa-models' ``` +!!!Info + Older versions of wheel may reject the native-build wheels of furiosa-models. + Please make sure of installing & upgrading Python packaging tools before + installing furiosa-models. + + ??? "Building from Source Code (click to see)" Or you can build from the source code as following: @@ -49,7 +55,7 @@ You can simply load a model and run through furiosa-sdk as the following: This example does: 1. Load the [SSDMobileNet](models/ssd_mobilenet.md) model -2. Create a `session`, which is the main class of Furiosa Runtime, that can load an ONNX/tflite model onto NPU and run inferences. +2. Create a `Runner`, which is one of the main classes of Furiosa Runtime, that can load an ONNX/tflite model onto NPU and run inferences. 3. Run an inference with pre/post process functions. A `Model` instance is a Python object, including model artifacts, metadata, and its pre/postprocessors. @@ -60,5 +66,8 @@ Also, you can find all available models at Each model page includes the details of the model, input and output tensors, and pre/post processings, and API reference. -If you want to learn more about `furiosa.runtime.session` in Furiosa Runtime, please refer to -[Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/latest/en/software/tutorials.html). +If you want to learn more about `Runner` in Furiosa Runtime, please refer to below links. + +- [Furiosa SDK - furiosa.runtime API Reference](https://furiosa-ai.github.io/docs/latest/en/api/python/furiosa.runtime.html) +- [Furiosa SDK - furiosa.runtime.sync.create_runner Reference](https://furiosa-ai.github.io/docs/latest/en/api/python/furiosa.runtime.html#furiosa.runtime.sync.Runtime) +- [Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/latest/en/software/tutorials.html). diff --git a/docs/model_object.md b/docs/model_object.md index 7ca96f7f..073f93dc 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -23,11 +23,11 @@ ENF format is [FuriosaAI Compiler](https://furiosa-ai.github.io/docs/latest/en/s Once you have the ENF file, you can reuse it to omit the compilation process that take up to minutes. In addition, a `Model` object has various metadata. The followings are all attributes belonging to a single `Model` object. -### `furiosa.models.types.Model` ::: furiosa.models.types.Model options: - show_source: true - show_symbol_type_toc: true + show_root_heading: true + heading_level: 4 + show_source: false ## Inferencing with Session API diff --git a/docs/tutorials/collect_calibration_ranges.ipynb b/docs/tutorials/collect_calibration_ranges.ipynb new file mode 100644 index 00000000..dedbf1a0 --- /dev/null +++ b/docs/tutorials/collect_calibration_ranges.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Collect calibration ranges\n", + "\n", + "TBU" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/user_guide.ipynb b/docs/tutorials/navigate_models.ipynb similarity index 52% rename from docs/user_guide.ipynb rename to docs/tutorials/navigate_models.ipynb index 08ea8752..dcf081a2 100644 --- a/docs/user_guide.ipynb +++ b/docs/tutorials/navigate_models.ipynb @@ -4,9 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Furiosa Model Zoo User Guide\n", - "모델 주는 FuriosaAI NPU를 사용해서 가속할 수 있는 모델들을 쉽게 사용할 수 있도록 모아놓은 프로젝트입니다.\n", - "먼저, 사용 가능한 모델들을 살펴보겠습니다." + "모델 주는 FuriosaAI NPU를 사용해서 가속할 수 있는 모델들을 쉽게 사용할 수 있도록 모아놓은 프로젝트입니다. 먼저, 사용 가능한 모델들을 살펴보겠습니다." ] }, { @@ -53,7 +51,81 @@ } ], "source": [ - "!furiosa-models list" + "# Or you can use the Command line tool\n", + "! furiosa-models list" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "그 중 하나인 ResNet50의 인스턴스를 만들고 `Model` 객체에 관해 자세히 알아보겠습니다." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name='ResNet50' task_type= format= family='ResNet' version='v1.5' metadata=Metadata(description='ResNet50 v1.5 int8 ImageNet-1K', publication=Publication(authors=None, title=None, publisher=None, date=None, url='https://arxiv.org/abs/1512.03385.pdf')) tags=None\n", + "Static fields: ['name', 'task_type', 'format', 'family', 'version', 'metadata', 'tags', 'preprocessor', 'postprocessor']\n", + "Lazy loaded fields: ['origin', 'tensor_name_to_range']\n" + ] + } + ], + "source": [ + "model = vision.ResNet50()\n", + "print(model)\n", + "\n", + "print(\"Static fields:\", list(model.model_fields.keys()))\n", + "print(\"Lazy loaded fields:\", list(model.model_computed_fields.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: ResNet50\n", + "format: ONNX\n", + "family: ResNet\n", + "version: v1.5\n", + "metadata:\n", + " description: ResNet50 v1.5 int8 ImageNet-1K\n", + " publication:\n", + " url: https://arxiv.org/abs/1512.03385.pdf\n", + "task type: Image Classification\n", + "available postprocess versions: Python\n" + ] + } + ], + "source": [ + "# You can see the static fields in Command line tool too\n", + "! furiosa-models desc ResNet50" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Actually there's one more hidden field (or, a method)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "_ = model.model_source(num_pe=2)" ] } ], diff --git a/docs/tutorials/quantize_and_compile_model.ipynb b/docs/tutorials/quantize_and_compile_model.ipynb new file mode 100644 index 00000000..fc9687d5 --- /dev/null +++ b/docs/tutorials/quantize_and_compile_model.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quantize and compiling Models\n", + "\n", + "Related fields:\n", + "- tensor_name_to_range\n", + "- origin\n", + "\n", + "See the Model's input/output summary" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "libfuriosa_hal.so --- v0.11.0, built @ 43c901f\n", + "libfuriosa_hal.so --- v0.11.0, built @ 43c901f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First field of calibration ranges: ('input_tensor:0', (-123.5584560111165, 150.34208860248327))\n", + "\u001b[2m2023-08-28T01:28:05.078547Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", + "\u001b[2m2023-08-28T01:28:05.084541Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", + "\u001b[2m2023-08-28T01:28:05.084554Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", + "\u001b[2m2023-08-28T01:28:05.084557Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] detected 1 NPU device(s):\n", + "\u001b[2m2023-08-28T01:28:05.094238Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)\n", + "\u001b[2m2023-08-28T01:28:05.094462Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] started\n", + "\u001b[2m2023-08-28T01:28:09.761370Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102809-v88fdx.log\n", + "\u001b[2m2023-08-28T01:28:09.761655Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created Sess-08a8e56d using npu:2:0-1\n", + "\u001b[2m2023-08-28T01:28:09.773352Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-08a8e56d] compiling the model (target: warboy-b0-2pe, 128dpes, size: 102.2 MB)\n", + "\u001b[2m2023-08-28T01:28:12.780705Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-08a8e56d] the model compile is successful (took 3 secs)\n", + "\u001b[2m2023-08-28T01:28:13.227285Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created 1 NPU threads on npu:2:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", + "Inputs:\n", + "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=FLOAT32, format=NCHW, size=602112, len=150528)}\n", + "Outputs:\n", + "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", + "\u001b[2m2023-08-28T01:28:13.413781Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-08a8e56d] terminated\n", + "\u001b[2m2023-08-28T01:28:13.417749Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0-1) has been closed\n", + "\u001b[2m2023-08-28T01:28:13.419580Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] stopped\n" + ] + } + ], + "source": [ + "from furiosa.models import vision\n", + "from furiosa.quantizer import quantize\n", + "from furiosa.runtime.sync import create_runner\n", + "\n", + "import onnx\n", + "\n", + "model = vision.ResNet50()\n", + "print(\"First field of calibration ranges:\", next(iter(model.tensor_name_to_range.items())))\n", + "\n", + "f32_onnx_model = onnx.load_from_string(model.origin)\n", + "quantized_onnx = quantize(f32_onnx_model, model.tensor_name_to_range)\n", + "\n", + "with create_runner(quantized_onnx) as runner:\n", + " runner.model.print_summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run inferences with scaling" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2023-08-28T01:28:13.806497Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", + "\u001b[2m2023-08-28T01:28:13.811804Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", + "\u001b[2m2023-08-28T01:28:13.811809Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", + "\u001b[2m2023-08-28T01:28:13.811811Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] detected 1 NPU device(s):\n", + "\u001b[2m2023-08-28T01:28:13.823402Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)\n", + "\u001b[2m2023-08-28T01:28:13.823534Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] started\n", + "\u001b[2m2023-08-28T01:28:17.728906Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102817-okpycl.log\n", + "\u001b[2m2023-08-28T01:28:17.729397Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] created Sess-541713a4 using npu:2:0-1\n", + "\u001b[2m2023-08-28T01:28:17.743437Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-541713a4] compiling the model (target: warboy-b0-2pe, 128dpes, size: 102.2 MB)\n", + "\u001b[2m2023-08-28T01:28:20.754124Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-541713a4] the model compile is successful (took 3 secs)\n", + "\u001b[2m2023-08-28T01:28:21.203223Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] created 1 NPU threads on npu:2:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", + "Inputs:\n", + "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=FLOAT32, format=NCHW, size=602112, len=150528)}\n", + "Outputs:\n", + "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", + "Average inference time: 5.614574640989304 ms\n", + "\u001b[2m2023-08-28T01:28:27.071158Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-541713a4] terminated\n", + "\u001b[2m2023-08-28T01:28:27.080271Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0-1) has been closed\n", + "\u001b[2m2023-08-28T01:28:27.085561Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] stopped\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from time import perf_counter\n", + "\n", + "\n", + "quantized_onnx = quantize(f32_onnx_model, model.tensor_name_to_range)\n", + "\n", + "with create_runner(quantized_onnx) as runner:\n", + " input_tensor_desc = runner.model.inputs()\n", + " runner.model.print_summary()\n", + " fake_input = [\n", + " np.asarray(np.random.randint(256, size=desc.shape), dtype=desc.dtype.numpy)\n", + " for desc in input_tensor_desc\n", + " ]\n", + " starting_time = perf_counter()\n", + " for _ in range(1000):\n", + " runner.run(fake_input)\n", + " print(\"Average inference time:\", perf_counter() - starting_time, \"ms\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run inferences without scaling (and quantize).\n", + "\n", + "\n", + "See [performance tuning guide](https://furiosa-ai.github.io/docs/latest/ko/software/performance.html#quantize) for more details.\n", + "\n", + "\n", + "Please note that input data type has been changed" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2023-08-28T01:28:27.838330Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", + "\u001b[2m2023-08-28T01:28:27.844387Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", + "\u001b[2m2023-08-28T01:28:27.844399Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", + "\u001b[2m2023-08-28T01:28:27.844403Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] detected 1 NPU device(s):\n", + "\u001b[2m2023-08-28T01:28:27.854235Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)\n", + "\u001b[2m2023-08-28T01:28:27.854453Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] started\n", + "\u001b[2m2023-08-28T01:28:32.712311Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102832-nwtxhl.log\n", + "\u001b[2m2023-08-28T01:28:32.712548Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] created Sess-78e2493d using npu:2:0-1\n", + "\u001b[2m2023-08-28T01:28:32.733493Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-78e2493d] compiling the model (target: warboy-b0-2pe, 128dpes, size: 102.2 MB)\n", + "\u001b[2m2023-08-28T01:28:35.824911Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-78e2493d] the model compile is successful (took 3 secs)\n", + "\u001b[2m2023-08-28T01:28:36.227750Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] created 1 NPU threads on npu:2:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", + "Inputs:\n", + "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)}\n", + "Outputs:\n", + "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", + "Average inference time: 2.5746346139349043 ms\n", + "\u001b[2m2023-08-28T01:28:39.026687Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-78e2493d] terminated\n", + "\u001b[2m2023-08-28T01:28:39.036203Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0-1) has been closed\n", + "\u001b[2m2023-08-28T01:28:39.041580Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] stopped\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "from furiosa.quantizer import ModelEditor, get_pure_input_names, TensorType\n", + "\n", + "\n", + "model_wo_input_quantize = deepcopy(f32_onnx_model)\n", + "editor = ModelEditor(model_wo_input_quantize)\n", + "for input_name in get_pure_input_names(model_wo_input_quantize):\n", + " editor.convert_input_type(input_name, TensorType.UINT8)\n", + "quantized_onnx_wo_input_quantize = quantize(model_wo_input_quantize, model.tensor_name_to_range)\n", + "\n", + "with create_runner(quantized_onnx_wo_input_quantize) as runner:\n", + " input_tensor_desc = runner.model.inputs()\n", + " runner.model.print_summary()\n", + " fake_input = [\n", + " np.random.randint(256, size=desc.shape, dtype=desc.dtype.numpy)\n", + " for desc in input_tensor_desc\n", + " ]\n", + " starting_time = perf_counter()\n", + " for _ in range(1000):\n", + " runner.run(fake_input)\n", + " print(\"Average inference time:\", perf_counter() - starting_time, \"ms\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can compile & use only 1pe" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2023-08-28T01:28:39.079503Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", + "\u001b[2m2023-08-28T01:28:39.084505Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", + "\u001b[2m2023-08-28T01:28:39.084523Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", + "\u001b[2m2023-08-28T01:28:39.084529Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] detected 1 NPU device(s):\n", + "\u001b[2m2023-08-28T01:28:39.094433Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0 (warboy-b0, 64dpes, firmware: 1.7.0, f7b0f28)\n", + "\u001b[2m2023-08-28T01:28:39.094599Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] started\n", + "\u001b[2m2023-08-28T01:28:44.681264Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102844-v2bdzh.log\n", + "\u001b[2m2023-08-28T01:28:44.681540Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] created Sess-660528cf using npu:2:0\n", + "\u001b[2m2023-08-28T01:28:44.693511Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-660528cf] compiling the model (target: warboy-b0, 64dpes, size: 102.2 MB)\n", + "\u001b[2m2023-08-28T01:28:47.133498Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-660528cf] the model compile is successful (took 2 secs)\n", + "\u001b[2m2023-08-28T01:28:47.336202Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] created 1 NPU threads on npu:2:0 (DRAM: 14.0 MiB/16.0 GiB, SRAM: 16.0 MiB/64.0 MiB)\n", + "Inputs:\n", + "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)}\n", + "Outputs:\n", + "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", + "Average inference time: 2.743666300084442 ms\n", + "\u001b[2m2023-08-28T01:28:50.269460Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-660528cf] terminated\n", + "\u001b[2m2023-08-28T01:28:50.279019Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0) has been closed\n", + "\u001b[2m2023-08-28T01:28:50.284263Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] stopped\n" + ] + } + ], + "source": [ + "from furiosa.runtime.sync import Runtime\n", + "\n", + "\n", + "with Runtime(device=\"warboy(1)*1\") as runtime:\n", + " with runtime.create_runner(quantized_onnx_wo_input_quantize) as runner:\n", + " input_tensor_desc = runner.model.inputs()\n", + " runner.model.print_summary()\n", + " fake_input = [\n", + " np.random.randint(256, size=desc.shape, dtype=desc.dtype.numpy)\n", + " for desc in input_tensor_desc\n", + " ]\n", + " starting_time = perf_counter()\n", + " for _ in range(1000):\n", + " runner.run(fake_input)\n", + " print(\"Average inference time:\", perf_counter() - starting_time, \"ms\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "models", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/tutorials/serving_with_furiosa_serving.ipynb b/docs/tutorials/serving_with_furiosa_serving.ipynb new file mode 100644 index 00000000..01b4f934 --- /dev/null +++ b/docs/tutorials/serving_with_furiosa_serving.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Serving with furiosa-serving\n", + "\n", + "TBU" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/furiosa/models/types.py b/furiosa/models/types.py index 2e972a95..8d0d134c 100644 --- a/furiosa/models/types.py +++ b/furiosa/models/types.py @@ -121,11 +121,14 @@ class Model(ABC, BaseModel): postprocessor: PostProcessor = Field(..., repr=False, exclude=True) def preprocess(self, *args, **kwargs) -> Tuple[Sequence[npt.ArrayLike], Sequence[Context]]: - """preprocess input tensors""" + """Preprocess input tensors. Input of this function varies from model to model + + Returns: + A tuple that contains list of preprocessed input tensors and contexts""" return self.preprocessor(*args, **kwargs) def postprocess(self, *args, **kwargs): - """postprocess output tensors""" + """Postprocess output tensors""" return self.postprocessor(*args, **kwargs) @computed_field(repr=False) @@ -140,7 +143,7 @@ def tensor_name_to_range(self) -> Dict[str, List[float]]: return yaml.full_load(calib_yaml) def model_source(self, num_pe: Literal[1, 2] = 2) -> bytes: - """the executable binary for furiosa runtime and NPU. It can be + """Returns an executable binary for furiosa runtime and NPU. It can be directly fed to `furiosa.runtime.create_runner`. If model binary is not compiled yet, it will be quantized & compiled automatically if possible @@ -154,7 +157,7 @@ def model_source(self, num_pe: Literal[1, 2] = 2) -> bytes: return resolve_model_source(self._artifact_name, num_pe=num_pe) def resolve_all(self): - """resolve all non-cached properties(origin, tensor_name_to_range, model_sources)""" + """Resolve all non-cached properties(origin, tensor_name_to_range, model_sources)""" _ = self.origin, self.tensor_name_to_range for num_pe in (1, 2): _ = self.model_source(num_pe=num_pe) diff --git a/mkdocs.yml b/mkdocs.yml index 3d5dee06..2fce9431 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,9 +20,13 @@ repo_url: https://github.com/furiosa-ai/furiosa-models nav: - Overview: index.md - getting_started.md -- user_guide.ipynb +- Tutorials: + - tutorials/navigate_models.ipynb + - tutorials/collect_calibration_ranges.ipynb + - tutorials/quantize_and_compile_model.ipynb + - tutorials/serving_with_furiosa_serving.ipynb - model_object.md -- command_tool.md +- command_line_tool.md - Models: - models/resnet50_v1.5.md - models/efficientnet_b0.md From b0c007400bfe3d9b7d82d07676e89852d80c48a7 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Mon, 28 Aug 2023 14:18:17 +0900 Subject: [PATCH 09/18] ci: loosen nbmake timeout --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cb6e9b7c..f46c6f74 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ unit_tests: pytest ./tests/unit/ -s notebook_tests: - pytest --nbmake ./docs + pytest --nbmake --nbmake-timeout=3600 ./docs examples: for f in $$(find docs/examples/ -name *.py); do printf "\n[TEST] $$f ...\n"; python3 $$f || exit 1; done From 66470fac6a2c41e40268932bac1461de3a49da56 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Wed, 30 Aug 2023 19:26:56 +0900 Subject: [PATCH 10/18] docs: add serving example --- Makefile | 2 +- docs/examples/serving.py | 45 +++++++++++++++++++ .../serving_with_furiosa_serving.ipynb | 21 --------- .../tutorials/serving_with_furiosa_serving.md | 5 +++ mkdocs.yml | 2 +- 5 files changed, 52 insertions(+), 23 deletions(-) create mode 100644 docs/examples/serving.py delete mode 100644 docs/tutorials/serving_with_furiosa_serving.ipynb create mode 100644 docs/tutorials/serving_with_furiosa_serving.md diff --git a/Makefile b/Makefile index f46c6f74..68b19667 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ notebook_tests: pytest --nbmake --nbmake-timeout=3600 ./docs examples: - for f in $$(find docs/examples/ -name *.py); do printf "\n[TEST] $$f ...\n"; python3 $$f || exit 1; done + for f in $$(find docs/examples/ -name *.py | grep -v "serving"); do printf "\n[TEST] $$f ...\n"; python3 $$f || exit 1; done regression-test-all: pytest ./tests/bench/ diff --git a/docs/examples/serving.py b/docs/examples/serving.py new file mode 100644 index 00000000..ee4d02f3 --- /dev/null +++ b/docs/examples/serving.py @@ -0,0 +1,45 @@ +from tempfile import NamedTemporaryFile +from typing import Dict, List + +from fastapi import FastAPI, File, UploadFile +import numpy as np +import uvicorn + +from furiosa.common.thread import synchronous +from furiosa.models import vision +from furiosa.serving import ServeAPI, ServeModel + +serve = ServeAPI() +app: FastAPI = serve.app + +resnet50 = vision.ResNet50() +# ServeModel does not support in-memory model binary for now, +# so we write model into temp file and pass its path +model_file = NamedTemporaryFile() +model_file.write(resnet50.model_source()) +model_file_path = model_file.name + +model: ServeModel = synchronous(serve.model("furiosart"))('ResNet50', location=model_file_path) + + +@model.post("/infer") +async def infer(image: UploadFile = File(...)) -> Dict[str, str]: + # Model Zoo's preprocesses do not consider in-memory image file for now + # (note that it's different from in-memory tensor) + # so we write in-memory image into temp file and pass its path + image_file_path = NamedTemporaryFile() + image_file_path.write(await image.read()) + + tensors, _ctx = resnet50.preprocess(image_file_path.name) + + # Infer from ServeModel + result: List[np.ndarray] = await model.predict(tensors) + + response: str = resnet50.postprocess(result) + + return {"result": response} + + +if __name__ == "__main__": + # Run the server if current Python script is called directly + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/docs/tutorials/serving_with_furiosa_serving.ipynb b/docs/tutorials/serving_with_furiosa_serving.ipynb deleted file mode 100644 index 01b4f934..00000000 --- a/docs/tutorials/serving_with_furiosa_serving.ipynb +++ /dev/null @@ -1,21 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Serving with furiosa-serving\n", - "\n", - "TBU" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorials/serving_with_furiosa_serving.md b/docs/tutorials/serving_with_furiosa_serving.md new file mode 100644 index 00000000..93cd01fe --- /dev/null +++ b/docs/tutorials/serving_with_furiosa_serving.md @@ -0,0 +1,5 @@ +# Serving with furiosa-serving + +```python +--8<-- "docs/examples/serving.py" +``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 2fce9431..7b7970ed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -24,7 +24,7 @@ nav: - tutorials/navigate_models.ipynb - tutorials/collect_calibration_ranges.ipynb - tutorials/quantize_and_compile_model.ipynb - - tutorials/serving_with_furiosa_serving.ipynb + - tutorials/serving_with_furiosa_serving.md - model_object.md - command_line_tool.md - Models: From 5458dac0099c85ebd53958344d522affd545435c Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Mon, 25 Sep 2023 15:19:51 +0900 Subject: [PATCH 11/18] docs: trim ws/newline in ver when releasing docs --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 28f5fb20..47abfde7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -82,7 +82,7 @@ jobs: cd ./furiosa-models export GIT_HASH=$(git rev-parse --short HEAD) export ISSUE_URL=$(echo $ISSUE | cut -d'/' -f-5) - export DOCS_PATH=$(echo $BODY | head -n 1 | cut -d'/' -f3) + export DOCS_PATH=$(echo $BODY | head -n 1 | cut -d'/' -f3 | xargs) # use xargs to trim ws/newline chars echo "Git short hash: $GIT_HASH" echo "Issue URL base: $ISSUE_URL" if [ ! $(echo "$DOCS_PATH" | grep -P "v([0-9]+\.){2}[0-9]+") ]; then From 41369c1c616e3db8f65c35f8c277b7f3f64af973 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Mon, 25 Sep 2023 15:28:46 +0900 Subject: [PATCH 12/18] ci: bump version up to 0.10.0 --- README.md | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index daeae175..44e2c49a 100644 --- a/README.md +++ b/README.md @@ -9,20 +9,21 @@ for FuriosaAI NPU. However, all models are standard ONNX or tflite models, and they can run even on CPU and GPU as well. ## Releases +* [v0.10.0](https://furiosa-ai.github.io/furiosa-models/v0.10.0/changelog/) - 2023-08-28 * [v0.9.1](https://furiosa-ai.github.io/furiosa-models/v0.9.1/changelog/) - 2023-05-26 * [v0.9.0](https://furiosa-ai.github.io/furiosa-models/v0.9.0/changelog/) - 2023-05-12 * [v0.8.0](https://furiosa-ai.github.io/furiosa-models/v0.8.0/changelog/) - 2022-11-10 ## Online Documentation -If you are new, you can start from [Getting Started](https://furiosa-ai.github.io/furiosa-models/latest/getting_started/). +If you are new, you can start from [Getting Started](https://furiosa-ai.github.io/furiosa-models/v0.10.0/getting_started/). You can also find the latest online documents, including programming guides, API references, and examples from the followings: * [Furiosa Models - Latest Documentation](https://furiosa-ai.github.io/furiosa-models/latest/) -* [Model object](https://furiosa-ai.github.io/furiosa-models/latest/model_object/) -* [Model List](https://furiosa-ai.github.io/furiosa-models/latest/#model_list) -* [Command Tool](https://furiosa-ai.github.io/furiosa-models/latest/command_tool/) -* [Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/latest/en/software/tutorials.html) +* [Model object](https://furiosa-ai.github.io/furiosa-models/v0.10.0/model_object/) +* [Model List](https://furiosa-ai.github.io/furiosa-models/v0.10.0/#model_list) +* [Command Tool](https://furiosa-ai.github.io/furiosa-models/v0.10.0/command_line_tool/) +* [Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/v0.10.0/en/software/tutorials.html) ## Model List @@ -31,13 +32,13 @@ you can find details about loading a model, their input and output tensors, pre/ | Model | Task | Size | Accuracy | | ------------------------------------------------------------------------------------------------ | -------------------- | ---- | ------------------------- | -| [ResNet50](https://furiosa-ai.github.io/furiosa-models/latest/models/resnet50_v1.5/) | Image Classification | 25M | 75.618% (ImageNet1K-val) | -| [EfficientNetB0](https://furiosa-ai.github.io/furiosa-models/latest/models/efficientnet_b0/) | Image Classification | 6.4M | 72.44% (ImageNet1K-val) | -| [EfficientNetV2-S](https://furiosa-ai.github.io/furiosa-models/latest/models/efficientnet_v2_s/) | Image Classification | 26M | 83.532% (ImageNet1K-val) | -| [SSDMobileNet](https://furiosa-ai.github.io/furiosa-models/latest/models/ssd_mobilenet/) | Object Detection | 7.2M | mAP 0.232 (COCO 2017-val) | -| [SSDResNet34](https://furiosa-ai.github.io/furiosa-models/latest/models/ssd_resnet34/) | Object Detection | 20M | mAP 0.220 (COCO 2017-val) | -| [YOLOv5M](https://furiosa-ai.github.io/furiosa-models/latest/models/yolov5m/) | Object Detection | 21M | mAP 0.272 (Bdd100k-val)\* | -| [YOLOv5L](https://furiosa-ai.github.io/furiosa-models/latest/models/yolov5l/) | Object Detection | 46M | mAP 0.284 (Bdd100k-val)\* | +| [ResNet50](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/resnet50_v1.5/) | Image Classification | 25M | 75.618% (ImageNet1K-val) | +| [EfficientNetB0](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/efficientnet_b0/) | Image Classification | 6.4M | 72.44% (ImageNet1K-val) | +| [EfficientNetV2-S](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/efficientnet_v2_s/) | Image Classification | 26M | 83.532% (ImageNet1K-val) | +| [SSDMobileNet](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/ssd_mobilenet/) | Object Detection | 7.2M | mAP 0.232 (COCO 2017-val) | +| [SSDResNet34](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/ssd_resnet34/) | Object Detection | 20M | mAP 0.220 (COCO 2017-val) | +| [YOLOv5M](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/yolov5m/) | Object Detection | 21M | mAP 0.272 (Bdd100k-val)\* | +| [YOLOv5L](https://furiosa-ai.github.io/furiosa-models/v0.10.0/models/yolov5l/) | Object Detection | 46M | mAP 0.284 (Bdd100k-val)\* | _\*: The accuracy of the yolov5 f32 model trained with bdd100k-val dataset, is mAP 0.295 (for yolov5m) and mAP 0.316 (for yolov5l)._ From add3e67f51f5d34407b2b99f5b000e765286b067 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Wed, 30 Aug 2023 13:29:44 +0900 Subject: [PATCH 13/18] nits: import early, packaging stuffs correctly --- docs/examples/ssd_mobilenet_onnx.py | 6 ++++++ furiosa/models/client/api.py | 4 ++-- pyproject.toml | 16 ++++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/docs/examples/ssd_mobilenet_onnx.py b/docs/examples/ssd_mobilenet_onnx.py index b5a80294..ebb715c3 100644 --- a/docs/examples/ssd_mobilenet_onnx.py +++ b/docs/examples/ssd_mobilenet_onnx.py @@ -15,6 +15,12 @@ quantized_onnx = quantize(onnx_model, tensor_name_to_range) with create_runner(quantized_onnx, compiler_config=compiler_config) as runner: + # Models in the Model Zoo have built-in optimizations that, by default, + # bypass normalization, quantization, and type conversion. If you compile + # and utilize these models without employing these optimizations, it's + # necessary to set up preprocessing steps to incorporate normalization and + # type casting. To accomplish this, you should introduce an extra parameter, + # `with_scaling=True`. inputs, contexts = mobilenet.preprocess(image, with_scaling=True) outputs = runner.run(inputs) mobilenet.postprocess(outputs, contexts[0]) diff --git a/furiosa/models/client/api.py b/furiosa/models/client/api.py index 6236ae57..03e34db0 100644 --- a/furiosa/models/client/api.py +++ b/furiosa/models/client/api.py @@ -3,6 +3,8 @@ from tqdm import tqdm +from furiosa.runtime.sync import create_runner + from .. import vision from ..types import Model, PythonPostProcessor @@ -102,8 +104,6 @@ def decorate_result( def run_inferences(model_cls: Type[Model], input_paths: Sequence[str], postprocess: Optional[str]): - from furiosa.runtime.sync import create_runner - warning = """WARN: the benchmark results may depend on the number of input samples, sizes of the images, and a machine where this benchmark is running.""" if postprocess: diff --git a/pyproject.toml b/pyproject.toml index 8c78bb48..c1b2a19d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,8 +65,20 @@ Documentation = "https://furiosa-ai.github.io/furiosa-models/latest/" [tool.flit.module] name = "furiosa.models" -[tool.flit.external-data] -directory = "furiosa/models/data" +[tool.flit.sdist] +exclude = [ + '.dvc', + '.dvcignore', + '.github', + '.gitignore', + 'Makefile', + 'ci-constraints.txt', + 'docker', + 'docs', + 'mkdocs.yml', + 'tekton', + 'tests', +] [tool.pytest.ini_options] addopts = "--benchmark-autosave" From 9eb8d4ad72d84e5732e8818ef7230ab9aec6c127 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Fri, 27 Oct 2023 19:19:39 +0900 Subject: [PATCH 14/18] docs: apply changes from 0.10.0 update --- docs/command_line_tool.md | 6 +- docs/model_object.md | 53 +++++--- .../collect_calibration_ranges.ipynb | 21 --- docs/tutorials/navigate_models.ipynb | 126 ++++++++++++++---- .../tutorials/serving_with_furiosa_serving.md | 2 +- mkdocs.yml | 1 - 6 files changed, 134 insertions(+), 75 deletions(-) delete mode 100644 docs/tutorials/collect_calibration_ranges.ipynb diff --git a/docs/command_line_tool.md b/docs/command_line_tool.md index 03d8f0da..da5a8b9b 100644 --- a/docs/command_line_tool.md +++ b/docs/command_line_tool.md @@ -13,7 +13,7 @@ furiosa-models [-h] {list, desc, bench} ... `furiosa-models` command has three subcommands: `list`, `desc`, and `bench`. -## Subcommand: list +## Subcommand: `list` `list` subcommand prints out the list of models with attributes. You will be able to figure out what models are available. @@ -35,7 +35,7 @@ $ furiosa-models list +-----------------+------------------------------+----------------------+-------------------------+ ``` -## Subcommand: bench +## Subcommand: `bench` `bench` subcommand runs a specific model with a given path where the input sample data are located. It will print out the performance benchmark results like QPS. @@ -72,7 +72,7 @@ QPS: 790.88645 Avg. elapsed time / sample: 1.26440 ms ``` -## Subcommand: desc +## Subcommand: `desc` `desc` subcommand shows the details of a specific model. diff --git a/docs/model_object.md b/docs/model_object.md index 073f93dc..d32bc98f 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -1,14 +1,16 @@ # Model object -In `furiosa-models` project, `Model` is the first class object, and it represents a neural network model. -This document explains what [`Model`][furiosa.models.types.Model] object offers and their usages. +In the `furiosa-models` project, the `Model` is the primary class object, representing a neural network model. This document elucidates the offerings and uses of the [`Model`][furiosa.models.types.Model] object. + ## Loading a pre-trained model -To load a pre-trained neural-network model, you need to call the `Model` object. -Since the sizes of pre-trained model weights vary from tens to hundreds megabytes, -the model images are not included in Python package. First time the model object is called, a pre-trained model will be -fetched over the network. It takes some time (usually few seconds) depending on models and network conditions. -Once the model images are fetched, they will be cached on a local disk. + +To load a pre-trained neural network model, you need to invoke the `Model` object. +As the sizes of pre-trained model weights can range from tens to hundreds of megabytes, +the model images are not included in the Python package. The first time the model object is called, +a pre-trained model will be fetched over the network. This process takes some time, +typically a few seconds, depending on the models and network conditions. +Once the model images are fetched, they will be cached on your local disk. === "Load module" ```python @@ -16,12 +18,15 @@ Once the model images are fetched, they will be cached on a local disk. ``` + ## Accessing artifacts and metadata -A `Model` object includes model artifacts, such as ONNX, tflite, mapping from a tensor name to the tensor's min and max, and ENF. -ENF format is [FuriosaAI Compiler](https://furiosa-ai.github.io/docs/latest/en/software/compiler.html) specific format. -Once you have the ENF file, you can reuse it to omit the compilation process that take up to minutes. -In addition, a `Model` object has various metadata. The followings are all attributes belonging to a single `Model` object. +A Model object encompasses model artifacts, such as ONNX, TFLite, mapping from a tensor name to the tensor's min and max, and ENF. + +The ENF format is specific to the FuriosaAI Compiler. +Once you have the ENF file, you can reuse it to skip the compilation process, which can take up to several minutes. +You can acquire the ENF binary from the model_source() method. +In addition, a Model object contains various metadata attributes. ::: furiosa.models.types.Model options: @@ -30,9 +35,10 @@ In addition, a `Model` object has various metadata. The followings are all attri show_source: false -## Inferencing with Session API +## Inferencing with Runner API + +To create a Runner, pass the ENF binary obtained from the `model_source()` method of the model object to the `furiosa.runtime.sync.create_runner` function. If you prefer an asynchronous Runner, you can use the `furiosa.runtime.create_runner` function instead. Passing the pre-compiled ENF binary allows you to perform inference directly without the compilation process. Alternatively, you can also manually quantize and compile the original f32 model with the provided calibration range. -To create a session, pass the `enf` field of the model object to the furiosa.runtime.session.create() function. Passing the pre-compiled `enf` allows you to perform inference directly without the compilation process. Alternatively, you can also manually quantize and compile the original f32 model with the provided calibration range. !!!Info If you want to learn more about the installation of furiosa-sdk and how to use it, please follow the followings: @@ -41,14 +47,17 @@ To create a session, pass the `enf` field of the model object to the furiosa.run * [Python SDK Installation and User Guide](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html) * [Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/latest/en/software/tutorials.html) -Passing `Model.origin` to `session.create()` allows users to start from source models in ONNX or tflite and customize models to their specific use-cases. This customization includes options such as specifying batch sizes and compiler configurations for optimization purposes. For additional information on Model.origin, please refer to [Accessing artifacts and metadata](#accessing_artifacts_and_metadata). +Passing `Model.origin` to `create_runner()` allows users to start from source models in ONNX or tflite and customize models to their specific use-cases. This customization includes options such as specifying batch sizes and compiler configurations for optimization purposes. For additional information on Model.origin, please refer to [Accessing artifacts and metadata](#accessing_artifacts_and_metadata). -To utilize f32 source models, it is necessary to perform calibration and quantization. -Pre-calibrated data is readily available in Furiosa-models, facilitating direct access to the quantization process. -For manual quantization of the model, you can install the `furiosa-quantizer` package, which can be found at this [package link](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html#quantizer). -The tensor_name_to_range field of the model class represents this pre-calibrated data. -After quantization, the output will be in the form of FuriosaAI's IR which can then be passed to the session. -At this stage, the compiler configuration can be specified. +To work with f32 source models, calibration and quantization are essential steps. +You can access pre-calibrated data directly from furiosa-models, simplifying the quantization process. +If you prefer manual quantization of the model, you can install the `furiosa-quantizer` package, available at this [package link](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html#quantizer). +The `tensor_name_to_range` field within the model class contains this pre-calibrated data. + +Upon quantization, the output will be in FuriosaAI's Intermediate Representation (IR) format, which can then be passed to the Runner. At this stage, you have the option to specify the compiler configuration. + +After quantization, the output will be in the form of FuriosaAI's Intermediate Representation (IR) which +can then be passed to the session. At this stage, the compiler configuration can be specified. @@ -67,6 +76,7 @@ At this stage, the compiler configuration can be specified. ### Pre/Postprocessing + There are gaps between model input/outputs and user applications' desired input and output data. In general, inputs and outputs of a neural network model are tensors. In applications, user sample data are images in standard formats like PNG or JPEG, and @@ -105,7 +115,6 @@ In sum, typical steps of a single inference is as the following, as also shown a --8<-- "docs/examples/ssd_mobilenet_native.py" ``` +## See Also - -# See Also * [Furiosa SDK Documentation](https://furiosa-ai.github.io/docs/latest/en/) diff --git a/docs/tutorials/collect_calibration_ranges.ipynb b/docs/tutorials/collect_calibration_ranges.ipynb deleted file mode 100644 index dedbf1a0..00000000 --- a/docs/tutorials/collect_calibration_ranges.ipynb +++ /dev/null @@ -1,21 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Collect calibration ranges\n", - "\n", - "TBU" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorials/navigate_models.ipynb b/docs/tutorials/navigate_models.ipynb index dcf081a2..b37715e3 100644 --- a/docs/tutorials/navigate_models.ipynb +++ b/docs/tutorials/navigate_models.ipynb @@ -4,7 +4,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "모델 주는 FuriosaAI NPU를 사용해서 가속할 수 있는 모델들을 쉽게 사용할 수 있도록 모아놓은 프로젝트입니다. 먼저, 사용 가능한 모델들을 살펴보겠습니다." + "# Navigating Models from FuriosaAI Model Zoo\n", + "\n", + "## FuriosaAI's Software Stack\n", + "\n", + "FuriosaAI's software stack caters to a diverse range of deep learning models, with a primary focus on vision-related tasks. Within this stack, the FuriosaAI Compiler optimizes Deep Neural Network (DNN) models and generates executable code for the FuriosaAI NPU. It currently supports TFLite and ONNX models, utilizing the latest research and methods for optimization. The compiler efficiently accelerates various vision-related operators on the NPU while utilizing the CPU for unsupported operations.\n", + "\n", + "## Vision Models and Beyond\n", + "\n", + "FuriosaAI's first-generation NPU, Warboy, is specialized for vision-related tasks. It accelerates popular vision models like ResNet50, SSD-MobileNet, and EfficientNet, while also enabling users to create custom models that utilize supported operators. This flexibility ensures the generation of highly optimized NPU-ready code for various vision tasks.\n", + "\n", + "## Exploring Vision Models\n", + "\n", + "For easy exploration of vision models tailored for FuriosaAI's NPU, navigate to the `furiosa.models.vision` module. Here, you'll find a curated selection of models that have been optimized for efficient deployment on the FuriosaAI Warboy NPU.\n" ] }, { @@ -24,34 +36,17 @@ "from furiosa.models import vision\n", "\n", "\n", + "# List of available vision models\n", "print(dir(vision))" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----------------+------------------------------+----------------------+-------------------------+\n", - "| Model name | Model description | Task type | Available postprocesses |\n", - "+-----------------+------------------------------+----------------------+-------------------------+\n", - "| ResNet50 | MLCommons ResNet50 model | Image Classification | Python |\n", - "| SSDMobileNet | MLCommons MobileNet v1 model | Object Detection | Python, Rust |\n", - "| SSDResNet34 | MLCommons SSD ResNet34 model | Object Detection | Python, Rust |\n", - "| YOLOv5l | YOLOv5 Large model | Object Detection | Rust |\n", - "| YOLOv5m | YOLOv5 Medium model | Object Detection | Rust |\n", - "| EfficientNetB0 | EfficientNet B0 model | Image Classification | Python |\n", - "| EfficientNetV2s | EfficientNetV2-s model | Image Classification | Python |\n", - "+-----------------+------------------------------+----------------------+-------------------------+\n" - ] - } - ], + "outputs": [], "source": [ - "# Or you can use the Command line tool\n", + "# Alternatively, use the Command line tool to list models\n", "! furiosa-models list" ] }, @@ -59,7 +54,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "그 중 하나인 ResNet50의 인스턴스를 만들고 `Model` 객체에 관해 자세히 알아보겠습니다." + "| Model name | Model description | Task type | Available postprocesses |\n", + "|-----------------|------------------------------|----------------------|-------------------------|\n", + "| ResNet50 | MLCommons ResNet50 model | Image Classification | Python |\n", + "| SSDMobileNet | MLCommons MobileNet v1 model | Object Detection | Python, Rust |\n", + "| SSDResNet34 | MLCommons SSD ResNet34 model | Object Detection | Python, Rust |\n", + "| YOLOv5l | YOLOv5 Large model | Object Detection | Rust |\n", + "| YOLOv5m | YOLOv5 Medium model | Object Detection | Rust |\n", + "| EfficientNetB0 | EfficientNet B0 model | Image Classification | Python |\n", + "| EfficientNetV2s | EfficientNetV2-s model | Image Classification | Python |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's instantiate a Model class from vision models and delve deeper into its attributes." ] }, { @@ -81,7 +91,10 @@ "model = vision.ResNet50()\n", "print(model)\n", "\n", + "# Display the static fields of the model\n", "print(\"Static fields:\", list(model.model_fields.keys()))\n", + "\n", + "# Show the lazy-loaded fields of the model\n", "print(\"Lazy loaded fields:\", list(model.model_computed_fields.keys()))" ] }, @@ -94,6 +107,7 @@ "name": "stdout", "output_type": "stream", "text": [ + "libfuriosa_hal.so --- v0.11.0, built @ 43c901f\n", "name: ResNet50\n", "format: ONNX\n", "family: ResNet\n", @@ -108,7 +122,7 @@ } ], "source": [ - "# You can see the static fields in Command line tool too\n", + "# Moreover, you can access informative static fields using the Command line tool:\n", "! furiosa-models desc ResNet50" ] }, @@ -116,16 +130,74 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Actually there's one more hidden field (or, a method)" + "## Acquire the ENF Binary with `model_source()`\n", + "\n", + "FuriosaAI's Model object offers a method called `model_source()` which allows you to obtain the ENF (FuriosaAI Compiler-specific format) binary for a specific model. This ENF binary can be directly used for further processing or deployment without the need for recompilation. This is particularly beneficial when you want to save time and resources associated with the compilation process.\n", + "\n", + "Using `model_source()` is straightforward. You call this method on a Model object and, as a result, you receive the ENF binary. The `num_pe` parameter, which has a default value of 2, specifies the number of processing elements (PE) to use. You can set it to 1 if you want to use a single PE for the model. This flexibility allows you to optimize the model's deployment according to your specific requirements, whether it's for single-PE or fusioned-PE scenarios.\n", + "\n", + "Here's an example of how to use `model_source()`:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "libfuriosa_hal.so --- v0.11.0, built @ 43c901f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2023-10-27T07:37:13.530567Z\u001b[0m \u001b[33m WARN\u001b[0m \u001b[2mfuriosa_compiler::api\u001b[0m\u001b[2m:\u001b[0m furiosa-compiler and the dependent IR may be not compatible: (0.10.0#f8f05c8ea != 0.10.1#8b00177dc)\n", + "\u001b[2m2023-10-27T07:37:13.549259Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.2, rev: a45bb1a0b, built at: 2023-10-12T06:41:21Z) bootstrapping ...\n", + "\u001b[2m2023-10-27T07:37:13.553584Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", + "\u001b[2m2023-10-27T07:37:13.553612Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", + "\u001b[2m2023-10-27T07:37:13.553622Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] detected 1 NPU device(s):\n", + "\u001b[2m2023-10-27T07:37:13.562958Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:5:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.3276053508, f7b0f28)\n", + "\u001b[2m2023-10-27T07:37:13.563294Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] started\n", + "\u001b[2m2023-10-27T07:37:16.818533Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20231027163716-6dffk3.log\n", + "\u001b[2m2023-10-27T07:37:16.818838Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created Sess-cf0fc5ce using npu:5:0-1\n", + "\u001b[2m2023-10-27T07:37:16.831883Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-cf0fc5ce] compiling the model (target: warboy-b0-2pe, 128dpes, size: 57.1 MB)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":-) Finished in 0.000006756s\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2023-10-27T07:37:20.186572Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-cf0fc5ce] the model compile is successful (took 3 secs)\n", + "\u001b[2m2023-10-27T07:37:20.553656Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created 1 NPU threads on npu:5:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", + "[TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)]\n", + "\u001b[2m2023-10-27T07:37:20.679167Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-cf0fc5ce] terminated\n", + "\u001b[2m2023-10-27T07:37:20.684148Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:5:0-1) has been closed\n", + "\u001b[2m2023-10-27T07:37:20.689574Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] stopped\n" + ] + } + ], "source": [ - "_ = model.model_source(num_pe=2)" + "from furiosa.runtime.sync import create_runner\n", + "\n", + "model_source = model.model_source(num_pe=2)\n", + "\n", + "# Create a runner with the model source\n", + "with create_runner(model_source) as runner:\n", + " # Print model inputs metadata\n", + " print(runner.model.inputs())\n", + " # Run inferences, ...\n", + " ..." ] } ], diff --git a/docs/tutorials/serving_with_furiosa_serving.md b/docs/tutorials/serving_with_furiosa_serving.md index 93cd01fe..99cfa598 100644 --- a/docs/tutorials/serving_with_furiosa_serving.md +++ b/docs/tutorials/serving_with_furiosa_serving.md @@ -1,4 +1,4 @@ -# Serving with furiosa-serving +# Serving Example with furiosa-serving ```python --8<-- "docs/examples/serving.py" diff --git a/mkdocs.yml b/mkdocs.yml index 7b7970ed..f9f4ba14 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -22,7 +22,6 @@ nav: - getting_started.md - Tutorials: - tutorials/navigate_models.ipynb - - tutorials/collect_calibration_ranges.ipynb - tutorials/quantize_and_compile_model.ipynb - tutorials/serving_with_furiosa_serving.md - model_object.md From e159b9fe1680cb31e1ad8dbd7dd96869ee3c83cf Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Sat, 28 Oct 2023 17:26:15 +0900 Subject: [PATCH 15/18] ci: fix fnp version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c1b2a19d..0a94693a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "furiosa-common == 0.10.*", "furiosa-tools == 0.10.*", "furiosa-runtime == 0.10.*", - "furiosa-native-postprocess == 0.9.0_dev0", + "furiosa-native-postprocess == 0.9.0", "opencv-python-headless", "torch", From f771d983ce57a880d327e23decb087209b617338 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Tue, 31 Oct 2023 16:36:55 +0900 Subject: [PATCH 16/18] Apply simple review comments --- docs/model_object.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/model_object.md b/docs/model_object.md index d32bc98f..e7482bb5 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -23,11 +23,16 @@ Once the model images are fetched, they will be cached on your local disk. A Model object encompasses model artifacts, such as ONNX, TFLite, mapping from a tensor name to the tensor's min and max, and ENF. -The ENF format is specific to the FuriosaAI Compiler. +ENF is the serialization format of a compiled binary used in Furiosa SDK. Once you have the ENF file, you can reuse it to skip the compilation process, which can take up to several minutes. You can acquire the ENF binary from the model_source() method. In addition, a Model object contains various metadata attributes. +!!!Info + If you want to learn more about the ENF please visit + [Furiosa SDK - Compiler - Using ENF files](https://furiosa-ai.github.io/docs/latest/en/software/compiler.html#using-enf-files) + + ::: furiosa.models.types.Model options: show_root_heading: true @@ -37,7 +42,7 @@ In addition, a Model object contains various metadata attributes. ## Inferencing with Runner API -To create a Runner, pass the ENF binary obtained from the `model_source()` method of the model object to the `furiosa.runtime.sync.create_runner` function. If you prefer an asynchronous Runner, you can use the `furiosa.runtime.create_runner` function instead. Passing the pre-compiled ENF binary allows you to perform inference directly without the compilation process. Alternatively, you can also manually quantize and compile the original f32 model with the provided calibration range. +To create a Runner, you need to pass the ENF binary obtained from the `model_source()` method of the model object to the `furiosa.runtime.sync.create_runner` function. If you prefer an asynchronous Runner, you can use the `furiosa.runtime.create_runner` function instead. Passing the pre-compiled ENF binary allows you to perform inference directly without the compilation process. Alternatively, you can also manually quantize and compile the original f32 model with the provided calibration range. !!!Info @@ -51,7 +56,7 @@ Passing `Model.origin` to `create_runner()` allows users to start from source mo To work with f32 source models, calibration and quantization are essential steps. You can access pre-calibrated data directly from furiosa-models, simplifying the quantization process. -If you prefer manual quantization of the model, you can install the `furiosa-quantizer` package, available at this [package link](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html#quantizer). +If you prefer a manual quantization step for a model, you can install the `furiosa-quantizer` package, available at this [package link](https://furiosa-ai.github.io/docs/latest/en/software/python-sdk.html#quantizer). The `tensor_name_to_range` field within the model class contains this pre-calibrated data. Upon quantization, the output will be in FuriosaAI's Intermediate Representation (IR) format, which can then be passed to the Runner. At this stage, you have the option to specify the compiler configuration. From 100f66acb4ea634cf6a1246dde231f8940c1d88c Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Thu, 2 Nov 2023 16:04:42 +0900 Subject: [PATCH 17/18] docs: add details about quantize, & serving --- docs/examples/serving.py | 2 +- docs/tutorials/navigate_models.ipynb | 23 +-- .../quantize_and_compile_model.ipynb | 169 +++--------------- .../tutorials/serving_with_furiosa_serving.md | 92 +++++++++- 4 files changed, 114 insertions(+), 172 deletions(-) diff --git a/docs/examples/serving.py b/docs/examples/serving.py index ee4d02f3..f79dec7f 100644 --- a/docs/examples/serving.py +++ b/docs/examples/serving.py @@ -40,6 +40,6 @@ async def infer(image: UploadFile = File(...)) -> Dict[str, str]: return {"result": response} +# Run the server if current Python script is called directly if __name__ == "__main__": - # Run the server if current Python script is called directly uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/docs/tutorials/navigate_models.ipynb b/docs/tutorials/navigate_models.ipynb index b37715e3..d790bd20 100644 --- a/docs/tutorials/navigate_models.ipynb +++ b/docs/tutorials/navigate_models.ipynb @@ -151,22 +151,6 @@ "libfuriosa_hal.so --- v0.11.0, built @ 43c901f\n" ] }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m2023-10-27T07:37:13.530567Z\u001b[0m \u001b[33m WARN\u001b[0m \u001b[2mfuriosa_compiler::api\u001b[0m\u001b[2m:\u001b[0m furiosa-compiler and the dependent IR may be not compatible: (0.10.0#f8f05c8ea != 0.10.1#8b00177dc)\n", - "\u001b[2m2023-10-27T07:37:13.549259Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.2, rev: a45bb1a0b, built at: 2023-10-12T06:41:21Z) bootstrapping ...\n", - "\u001b[2m2023-10-27T07:37:13.553584Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", - "\u001b[2m2023-10-27T07:37:13.553612Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", - "\u001b[2m2023-10-27T07:37:13.553622Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] detected 1 NPU device(s):\n", - "\u001b[2m2023-10-27T07:37:13.562958Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:5:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.3276053508, f7b0f28)\n", - "\u001b[2m2023-10-27T07:37:13.563294Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] started\n", - "\u001b[2m2023-10-27T07:37:16.818533Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20231027163716-6dffk3.log\n", - "\u001b[2m2023-10-27T07:37:16.818838Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created Sess-cf0fc5ce using npu:5:0-1\n", - "\u001b[2m2023-10-27T07:37:16.831883Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-cf0fc5ce] compiling the model (target: warboy-b0-2pe, 128dpes, size: 57.1 MB)\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -178,12 +162,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[2m2023-10-27T07:37:20.186572Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-cf0fc5ce] the model compile is successful (took 3 secs)\n", - "\u001b[2m2023-10-27T07:37:20.553656Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created 1 NPU threads on npu:5:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", - "[TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)]\n", - "\u001b[2m2023-10-27T07:37:20.679167Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-cf0fc5ce] terminated\n", - "\u001b[2m2023-10-27T07:37:20.684148Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:5:0-1) has been closed\n", - "\u001b[2m2023-10-27T07:37:20.689574Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] stopped\n" + "[TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)]\n" ] } ], diff --git a/docs/tutorials/quantize_and_compile_model.ipynb b/docs/tutorials/quantize_and_compile_model.ipynb index fc9687d5..4fb1c1e0 100644 --- a/docs/tutorials/quantize_and_compile_model.ipynb +++ b/docs/tutorials/quantize_and_compile_model.ipynb @@ -4,13 +4,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Quantize and compiling Models\n", + "# Quantize and Compile Models\n", "\n", - "Related fields:\n", - "- tensor_name_to_range\n", - "- origin\n", + "Furiosa Model Zoo provides pre-compiled binaries that can be used directly with the NPU. However, we also offer the original model files and related metadata to allow for the application of different compiler options and calibration methods. In this document, we will explore the usage of the following two fields within the Model object:\n", + "- `tensor_name_to_range`\n", + "- `origin`\n", "\n", - "See the Model's input/output summary" + "For learn more about quantization and performance optimization, you can refer to the relevant SDK's documentation pages.\n", + "- [Furiosa SDK - Quantization](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html).\n", + "- [Furiosa SDK - Model Optimization - Quantize](https://furiosa-ai.github.io/docs/latest/en/software/performance.html#optimizing-quantize-operator).\n", + "\n", + "Now, we will run ResNet50 model without any further optimizations." ] }, { @@ -30,25 +34,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "First field of calibration ranges: ('input_tensor:0', (-123.5584560111165, 150.34208860248327))\n", - "\u001b[2m2023-08-28T01:28:05.078547Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", - "\u001b[2m2023-08-28T01:28:05.084541Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", - "\u001b[2m2023-08-28T01:28:05.084554Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", - "\u001b[2m2023-08-28T01:28:05.084557Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] detected 1 NPU device(s):\n", - "\u001b[2m2023-08-28T01:28:05.094238Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)\n", - "\u001b[2m2023-08-28T01:28:05.094462Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] started\n", - "\u001b[2m2023-08-28T01:28:09.761370Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102809-v88fdx.log\n", - "\u001b[2m2023-08-28T01:28:09.761655Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created Sess-08a8e56d using npu:2:0-1\n", - "\u001b[2m2023-08-28T01:28:09.773352Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-08a8e56d] compiling the model (target: warboy-b0-2pe, 128dpes, size: 102.2 MB)\n", - "\u001b[2m2023-08-28T01:28:12.780705Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-08a8e56d] the model compile is successful (took 3 secs)\n", - "\u001b[2m2023-08-28T01:28:13.227285Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] created 1 NPU threads on npu:2:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", + "Example field of calibration ranges: ('input_tensor:0', (-123.5584560111165, 150.34208860248327))\n", "Inputs:\n", "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=FLOAT32, format=NCHW, size=602112, len=150528)}\n", "Outputs:\n", "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", - "\u001b[2m2023-08-28T01:28:13.413781Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-08a8e56d] terminated\n", - "\u001b[2m2023-08-28T01:28:13.417749Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0-1) has been closed\n", - "\u001b[2m2023-08-28T01:28:13.419580Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-0] stopped\n" + "Average inference time: 5.456097726011649 ms\n" ] } ], @@ -58,65 +49,19 @@ "from furiosa.runtime.sync import create_runner\n", "\n", "import onnx\n", + "import numpy as np\n", "\n", - "model = vision.ResNet50()\n", - "print(\"First field of calibration ranges:\", next(iter(model.tensor_name_to_range.items())))\n", + "from time import perf_counter\n", "\n", + "model = vision.ResNet50()\n", "f32_onnx_model = onnx.load_from_string(model.origin)\n", "quantized_onnx = quantize(f32_onnx_model, model.tensor_name_to_range)\n", "\n", - "with create_runner(quantized_onnx) as runner:\n", - " runner.model.print_summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run inferences with scaling" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m2023-08-28T01:28:13.806497Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", - "\u001b[2m2023-08-28T01:28:13.811804Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", - "\u001b[2m2023-08-28T01:28:13.811809Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", - "\u001b[2m2023-08-28T01:28:13.811811Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] detected 1 NPU device(s):\n", - "\u001b[2m2023-08-28T01:28:13.823402Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)\n", - "\u001b[2m2023-08-28T01:28:13.823534Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] started\n", - "\u001b[2m2023-08-28T01:28:17.728906Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102817-okpycl.log\n", - "\u001b[2m2023-08-28T01:28:17.729397Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] created Sess-541713a4 using npu:2:0-1\n", - "\u001b[2m2023-08-28T01:28:17.743437Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-541713a4] compiling the model (target: warboy-b0-2pe, 128dpes, size: 102.2 MB)\n", - "\u001b[2m2023-08-28T01:28:20.754124Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-541713a4] the model compile is successful (took 3 secs)\n", - "\u001b[2m2023-08-28T01:28:21.203223Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] created 1 NPU threads on npu:2:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", - "Inputs:\n", - "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=FLOAT32, format=NCHW, size=602112, len=150528)}\n", - "Outputs:\n", - "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", - "Average inference time: 5.614574640989304 ms\n", - "\u001b[2m2023-08-28T01:28:27.071158Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-541713a4] terminated\n", - "\u001b[2m2023-08-28T01:28:27.080271Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0-1) has been closed\n", - "\u001b[2m2023-08-28T01:28:27.085561Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-1] stopped\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from time import perf_counter\n", - "\n", - "\n", - "quantized_onnx = quantize(f32_onnx_model, model.tensor_name_to_range)\n", + "print(\"Example field of calibration ranges:\", next(iter(model.tensor_name_to_range.items())))\n", "\n", "with create_runner(quantized_onnx) as runner:\n", - " input_tensor_desc = runner.model.inputs()\n", " runner.model.print_summary()\n", + " input_tensor_desc = runner.model.inputs()\n", " fake_input = [\n", " np.asarray(np.random.randint(256, size=desc.shape), dtype=desc.dtype.numpy)\n", " for desc in input_tensor_desc\n", @@ -131,43 +76,27 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Run inferences without scaling (and quantize).\n", - "\n", + "According to \n", + "[performance tuning guide](https://furiosa-ai.github.io/docs/latest/ko/software/performance.html#quantize), we can remove input tensors' quantize operator to optimize the model.\n", "\n", - "See [performance tuning guide](https://furiosa-ai.github.io/docs/latest/ko/software/performance.html#quantize) for more details.\n", "\n", - "\n", - "Please note that input data type has been changed" + "Please note that input tensors' data type has been changed from float32 to unsigned int 8." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[2m2023-08-28T01:28:27.838330Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", - "\u001b[2m2023-08-28T01:28:27.844387Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", - "\u001b[2m2023-08-28T01:28:27.844399Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", - "\u001b[2m2023-08-28T01:28:27.844403Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] detected 1 NPU device(s):\n", - "\u001b[2m2023-08-28T01:28:27.854235Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0-1 (warboy-b0-2pe, 128dpes, firmware: 1.7.0, f7b0f28)\n", - "\u001b[2m2023-08-28T01:28:27.854453Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] started\n", - "\u001b[2m2023-08-28T01:28:32.712311Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102832-nwtxhl.log\n", - "\u001b[2m2023-08-28T01:28:32.712548Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] created Sess-78e2493d using npu:2:0-1\n", - "\u001b[2m2023-08-28T01:28:32.733493Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-78e2493d] compiling the model (target: warboy-b0-2pe, 128dpes, size: 102.2 MB)\n", - "\u001b[2m2023-08-28T01:28:35.824911Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-78e2493d] the model compile is successful (took 3 secs)\n", - "\u001b[2m2023-08-28T01:28:36.227750Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] created 1 NPU threads on npu:2:0-1 (DRAM: 180.0 kiB/16.0 GiB, SRAM: 31.4 MiB/128.0 MiB)\n", "Inputs:\n", "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)}\n", "Outputs:\n", "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", - "Average inference time: 2.5746346139349043 ms\n", - "\u001b[2m2023-08-28T01:28:39.026687Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-78e2493d] terminated\n", - "\u001b[2m2023-08-28T01:28:39.036203Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0-1) has been closed\n", - "\u001b[2m2023-08-28T01:28:39.041580Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-2] stopped\n" + "Average inference time: 2.715405730996281 ms\n" ] } ], @@ -194,62 +123,6 @@ " runner.run(fake_input)\n", " print(\"Average inference time:\", perf_counter() - starting_time, \"ms\")" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can compile & use only 1pe" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2m2023-08-28T01:28:39.079503Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m FuriosaRT (v0.10.0, rev: ac1a04a8d, built at: 2023-08-08T12:15:46Z) bootstrapping ...\n", - "\u001b[2m2023-08-28T01:28:39.084505Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found furiosa-compiler (v0.10.0, rev: f8f05c8, built at: 2023-08-08T11:58:09Z)\n", - "\u001b[2m2023-08-28T01:28:39.084523Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m Found libhal (type: warboy, v0.11.0, rev: 43c901f built at: 2023-08-08T12:07:35Z)\n", - "\u001b[2m2023-08-28T01:28:39.084529Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] detected 1 NPU device(s):\n", - "\u001b[2m2023-08-28T01:28:39.094433Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m - [0] npu:2:0 (warboy-b0, 64dpes, firmware: 1.7.0, f7b0f28)\n", - "\u001b[2m2023-08-28T01:28:39.094599Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] started\n", - "\u001b[2m2023-08-28T01:28:44.681264Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa::runtime\u001b[0m\u001b[2m:\u001b[0m Saving the compilation log into /root/.local/state/furiosa/logs/compiler-20230828102844-v2bdzh.log\n", - "\u001b[2m2023-08-28T01:28:44.681540Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] created Sess-660528cf using npu:2:0\n", - "\u001b[2m2023-08-28T01:28:44.693511Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-660528cf] compiling the model (target: warboy-b0, 64dpes, size: 102.2 MB)\n", - "\u001b[2m2023-08-28T01:28:47.133498Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-660528cf] the model compile is successful (took 2 secs)\n", - "\u001b[2m2023-08-28T01:28:47.336202Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] created 1 NPU threads on npu:2:0 (DRAM: 14.0 MiB/16.0 GiB, SRAM: 16.0 MiB/64.0 MiB)\n", - "Inputs:\n", - "{0: TensorDesc(shape=(1, 3, 224, 224), dtype=UINT8, format=NCHW, size=150528, len=150528)}\n", - "Outputs:\n", - "{0: TensorDesc(shape=(1,), dtype=INT64, format=?, size=8, len=1)}\n", - "Average inference time: 2.743666300084442 ms\n", - "\u001b[2m2023-08-28T01:28:50.269460Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Sess-660528cf] terminated\n", - "\u001b[2m2023-08-28T01:28:50.279019Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::npu::raw\u001b[0m\u001b[2m:\u001b[0m NPU (npu:2:0) has been closed\n", - "\u001b[2m2023-08-28T01:28:50.284263Z\u001b[0m \u001b[32m INFO\u001b[0m \u001b[2mfuriosa_rt_core::driver::event_driven::coord\u001b[0m\u001b[2m:\u001b[0m [Runtime-3] stopped\n" - ] - } - ], - "source": [ - "from furiosa.runtime.sync import Runtime\n", - "\n", - "\n", - "with Runtime(device=\"warboy(1)*1\") as runtime:\n", - " with runtime.create_runner(quantized_onnx_wo_input_quantize) as runner:\n", - " input_tensor_desc = runner.model.inputs()\n", - " runner.model.print_summary()\n", - " fake_input = [\n", - " np.random.randint(256, size=desc.shape, dtype=desc.dtype.numpy)\n", - " for desc in input_tensor_desc\n", - " ]\n", - " starting_time = perf_counter()\n", - " for _ in range(1000):\n", - " runner.run(fake_input)\n", - " print(\"Average inference time:\", perf_counter() - starting_time, \"ms\")" - ] } ], "metadata": { diff --git a/docs/tutorials/serving_with_furiosa_serving.md b/docs/tutorials/serving_with_furiosa_serving.md index 99cfa598..35cf7dad 100644 --- a/docs/tutorials/serving_with_furiosa_serving.md +++ b/docs/tutorials/serving_with_furiosa_serving.md @@ -1,4 +1,94 @@ -# Serving Example with furiosa-serving +# Serving Example with Furiosa Serving + +Furiosa Serving is a lightweight library based on [FastAPI](https://fastapi.tiangolo.com/) that allows you to run a model server on a Furiosa NPU. + +For more information about Furiosa Serving, you can visit the [package link](https://pypi.org/project/furiosa-serving/). + +## Getting Started + +To get started with Furiosa Serving, you'll need to install the [furiosa-serving library](https://pypi.org/project/furiosa-serving/), create a ServeAPI (which is a `FastAPI` wrapper), and set up your model for serving. +In this example, we'll walk you through the steps to create a simple ResNet50 server. + +First, you'll need to import the necessary modules and initialize a FastAPI app: + +```python +from tempfile import NamedTemporaryFile +from typing import Dict, List + +from fastapi import FastAPI, File, UploadFile +import numpy as np +import uvicorn + +from furiosa.common.thread import synchronous +from furiosa.models import vision +from furiosa.serving import ServeAPI, ServeModel + +serve = ServeAPI() +app: FastAPI = serve.app +``` + +## Model Initialization + +Next, you can initialize a vision model, such as ResNet50, for serving: + +```python +resnet50 = vision.ResNet50() + +model_file = NamedTemporaryFile() +model_file.write(resnet50.model_source()) +model_file_path = model_file.name +model: ServeModel = synchronous(serve.model("furiosart"))( + 'ResNet50', location=model_file_path +) +``` + +!!!note + ServeModel does not support in-memory model binaries for now. Instead, you can write the model into a temporary file and pass its path like example. + +## Model Inference + +Now that you have your FastAPI app and model set up, you can define an endpoint for model inference. In this example, we create an endpoint that accepts an image file and performs inference using ResNet50: + +```python +@model.post("/infer") +async def infer(image: UploadFile = File(...)) -> Dict[str, str]: + # Model Zoo's preprocesses do not consider in-memory image file for now, + # so we write in-memory image into a temporary file and pass its path + image_file_path = NamedTemporaryFile() + image_file_path.write(await image.read()) + + tensors, _ctx = resnet50.preprocess(image_file_path.name) + + # Infer from ServeModel + result: List[np.ndarray] = await model.predict(tensors) + + response: str = resnet50.postprocess(result) + + return {"result": response} +``` + +## Running the Server + +Finally, you can run the FastAPI server using [uvicorn](https://www.uvicorn.org/). + +```python +# Run the server if the current Python script is called directly +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) +``` + +Alternatively, you can run [uvicorn](https://www.uvicorn.org/) server via internal app variable from ServeAPI instance like [normal FastAPI application](https://fastapi.tiangolo.com/tutorial/first-steps/#first-steps). + +```shell +$ uvicorn main:app # or uvicorn main:serve.app +``` + +This example demonstrates the basic setup of a FastAPI server with Furiosa Serving for model inference. You can extend this example to add more functionality to your server as needed. + +For more information and advanced usage of Furiosa Serving, please refer to the [Furiosa Serving documentation](https://pypi.org/project/furiosa-serving/). + + +You can find the full code example here. ```python --8<-- "docs/examples/serving.py" From c1aca86acbb0a987faf094a69e4d076ba6a60969 Mon Sep 17 00:00:00 2001 From: Myeong-geun Shin Date: Mon, 6 Nov 2023 12:35:03 +0900 Subject: [PATCH 18/18] docs: Capitalize documentations' titles --- README.md | 4 ++-- docs/developers-guide.md | 2 +- docs/model_object.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 44e2c49a..2d02424c 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ You can also find the latest online documents, including programming guides, API references, and examples from the followings: * [Furiosa Models - Latest Documentation](https://furiosa-ai.github.io/furiosa-models/latest/) -* [Model object](https://furiosa-ai.github.io/furiosa-models/v0.10.0/model_object/) +* [Model Object](https://furiosa-ai.github.io/furiosa-models/v0.10.0/model_object/) * [Model List](https://furiosa-ai.github.io/furiosa-models/v0.10.0/#model_list) -* [Command Tool](https://furiosa-ai.github.io/furiosa-models/v0.10.0/command_line_tool/) +* [Command Line Tool](https://furiosa-ai.github.io/furiosa-models/v0.10.0/command_line_tool/) * [Furiosa SDK - Tutorial and Code Examples](https://furiosa-ai.github.io/docs/v0.10.0/en/software/tutorials.html) diff --git a/docs/developers-guide.md b/docs/developers-guide.md index a514eb74..afd01d1c 100644 --- a/docs/developers-guide.md +++ b/docs/developers-guide.md @@ -1,4 +1,4 @@ -# Developer's guide +# Developer's Guide This documentation is for developers who want to contribute to Furiosa Models. diff --git a/docs/model_object.md b/docs/model_object.md index e7482bb5..9a76b2f3 100644 --- a/docs/model_object.md +++ b/docs/model_object.md @@ -1,4 +1,4 @@ -# Model object +# Model Object In the `furiosa-models` project, the `Model` is the primary class object, representing a neural network model. This document elucidates the offerings and uses of the [`Model`][furiosa.models.types.Model] object.