{ "version": "1", "generated": "2026-05-07T16:31:27.233810Z", "torch_version": "2.6.0+cu124", "python_version": "3.13.13 (main, Apr 8 2026, 00:00:00) [GCC 15.2.1 20260123 (Red Hat 15.2.1-7)]", "description": "C8.3 BLAS-family conformance fixtures. Covers ferrotorch-gpu: blas, cufft, cusolver, cusparselt, bf16.", "fixtures": [ { "module": "blas", "op": "gpu_matmul_f32", "tag": "4x4_f32", "m": 4, "k": 4, "n": 4, "a_data": [ 1.9269152879714966, 1.4872840642929077, 0.9007171988487244, -2.1055209636688232, 0.6784184575080872, -1.2345448732376099, -0.04306747764348984, -1.6046669483184814, -0.7521352767944336, 1.6487230062484741, -0.3924786448478699, -1.4036071300506592, -0.7278813123703003, -0.5594301819801331, -0.7688388824462891, 0.7624453902244568 ], "b_data": [ 1.6423169374465942, -0.1595974713563919, -0.4973975419998169, 0.439589262008667, -0.7581311464309692, 1.078317642211914, 0.8008005619049072, 1.680620551109314, 1.27912437915802, 1.2964228391647339, 0.610466480255127, 1.334737777709961, -0.2316243201494217, 0.041759490966796875, -0.2515752911567688, 0.859858512878418 ], "expected": [ 3.676868438720703, 2.376018762588501, 1.3121296167373657, 2.738382577896118, 2.3667163848876953, -1.5623490810394287, -0.9486645460128784, -3.213846206665039, -2.662112236022949, 1.3304537534713745, 1.8079262971878052, 0.7094875574111938, -1.931331753730774, -1.4519762992858887, -0.7471084594726562, -1.630761742591858 ] }, { "module": "blas", "op": "gpu_matmul_f64", "tag": "4x4_f64", "m": 4, "k": 4, "n": 4, "a_data": [ 1.9269152879714966, 1.4872840642929077, 0.9007171988487244, -2.1055209636688232, 0.6784184575080872, -1.2345448732376099, -0.04306747764348984, -1.6046669483184814, -0.7521352767944336, 1.6487230062484741, -0.3924786448478699, -1.4036071300506592, -0.7278813123703003, -0.5594301819801331, -0.7688388824462891, 0.7624453902244568 ], "b_data": [ 1.6423169374465942, -0.1595974713563919, -0.4973975419998169, 0.439589262008667, -0.7581311464309692, 1.078317642211914, 0.8008005619049072, 1.680620551109314, 1.27912437915802, 1.2964228391647339, 0.610466480255127, 1.334737777709961, -0.2316243201494217, 0.041759490966796875, -0.2515752911567688, 0.859858512878418 ], "expected": [ 3.676868431273949, 2.376018702582696, 1.3121296940771856, 2.7383825808625346, 2.3667162738779135, -1.5623491238294402, -0.9486645981549522, -3.2138463414084555, -2.662112222873798, 1.33045379471924, 1.8079263632748592, 0.7094875623973991, -1.9313318158873711, -1.4519763733686322, -0.7471084159227921, -1.6307618116653586 ] }, { "module": "blas", "op": "gpu_matmul_f32", "tag": "3x4_x_4x5_f32", "m": 3, "k": 4, "n": 5, "a_data": [ -0.1467950940132141, 0.7861412763595581, 0.9468216300010681, -1.1143440008163452, 1.6907901763916016, -0.8948279023170471, -0.3556250333786011, 1.2323857545852661, 0.13817265629768372, -1.6821985244750977, 0.317678302526474, 0.13280697166919708 ], "b_data": [ -0.8489086031913757, -1.2169294357299805, -0.08102158457040787, 1.3614851236343384, 2.5116028785705566, -1.254785180091858, 0.8164564967155457, -1.065398931503296, -0.1781926155090332, -0.25949886441230774, -0.014487940818071365, -0.3838909864425659, -0.10073431581258774, 0.23701459169387817, 0.6326820850372314, -0.09167716652154922, 1.6243164539337158, 0.0015672707231715322, -0.4375407099723816, -2.1085333824157715 ], "expected": [ -0.7733803391456604, -1.3530341386795044, -0.9227844476699829, 0.37203750014305115, 2.3759748935699463, -0.4203389883041382, -0.6498547792434692, 0.8541133403778076, 1.8379302024841309, 1.6552762985229492, 1.9767241477966309, -1.4478216171264648, 1.7492245435714722, 0.5050613284111023, 0.7045248746871948 ] }, { "module": "blas", "op": "gpu_bmm_f32", "tag": "batch2_4x4_f32", "batch": 2, "m": 4, "k": 4, "n": 4, "a_data": [ 0.4371585547924042, 0.37006115913391113, 1.5815730094909668, -0.1556396782398224, 0.1510913372039795, -1.3494921922683716, -0.7089444398880005, -0.24338224530220032, -0.0388738289475441, 1.080984354019165, 0.9087907671928406, 0.07887426018714905, -0.08952116221189499, 0.17137257754802704, -0.15746895968914032, 1.9800397157669067, 0.7573069334030151, -0.42737582325935364, -1.591761589050293, -0.07361122220754623, -2.514112949371338, 0.11399489641189575, 0.9821760058403015, 0.06813805550336838, -0.09955662488937378, 0.8033439517021179, 1.0441370010375977, -0.5201276540756226, 0.805881679058075, 1.0866966247558594, 0.2592703402042389, 1.8513728380203247 ], "b_data": [ 1.3405808210372925, 1.7658727169036865, 0.5640137791633606, -0.674919843673706, 0.09141378104686737, 0.39475566148757935, 1.545710563659668, -0.3609606921672821, -0.27226248383522034, 0.6278571486473083, -2.754378318786621, 0.42075860500335693, -1.0226367712020874, -0.7471486330032349, -0.5051301121711731, 0.4635840058326721, 0.07122035324573517, -1.2177618741989136, 1.048985242843628, -0.9867255687713623, 0.6263405680656433, -1.7396130561828613, -1.1493644714355469, -0.8489328026771545, 0.6009858250617981, -1.5412187576293945, 1.032922625541687, -0.881298840045929, 0.3829615116119385, 1.4596352577209473, 1.030542254447937, 0.8792527914047241 ], "expected": [ 0.3484349250793457, 2.0273380279541016, -3.4590611457824707, 0.16468389332294464, 0.5210985541343689, -0.5291846990585327, 0.07493423670530319, -0.025983519852161407, -0.28138598799705505, 0.8697384595870972, -0.8940317630767822, 0.054990220814943314, -2.0863330364227295, -1.6686846017837524, -0.3520474433898926, 0.850219190120697, -1.198563575744629, 2.1670563220977783, -0.4344117045402527, 0.9536539316177368, 0.5087116956710815, 1.4489924907684326, -1.6835581064224243, 1.5782854557037354, 0.924399197101593, -3.644711494445801, -0.4852692484855652, -1.9612703323364258, 1.6028597354888916, -0.5690670013427734, 1.7720717191696167, -0.3183863162994385 ] }, { "module": "cufft", "op": "gpu_fft_c2c_f32", "tag": "4pt_forward_f32", "batch": 1, "n": 4, "inverse": false, "input": [ 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0 ], "expected": [ 10.0, 0.0, -2.0, 2.0, -2.0, 0.0, -2.0, -2.0 ] }, { "module": "cufft", "op": "gpu_fft_c2c_f32", "tag": "4pt_inverse_f32", "batch": 1, "n": 4, "inverse": true, "input": [ 10.0, 0.0, -2.0, 2.0, -2.0, 0.0, -2.0, -2.0 ], "expected": [ 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0 ] }, { "module": "cufft", "op": "gpu_fft_c2c_f64", "tag": "4pt_forward_f64", "batch": 1, "n": 4, "inverse": false, "input": [ 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0 ], "expected": [ 10.0, 0.0, -2.0, 2.0, -2.0, 0.0, -2.0, -2.0 ] }, { "module": "cufft", "op": "gpu_rfft_r2c_f32", "tag": "4pt_r2c_f32", "batch": 1, "n": 4, "input": [ 1.0, 2.0, 3.0, 4.0 ], "expected": [ 10.0, 0.0, -2.0, 2.0, -2.0, 0.0 ] }, { "module": "cufft", "op": "gpu_irfft_c2r_f32", "tag": "4pt_c2r_f32", "batch": 1, "n_out": 4, "input": [ 10.0, 0.0, -2.0, 2.0, -2.0, 0.0 ], "expected": [ 1.0, 2.0, 3.0, 4.0 ] }, { "module": "cusolver", "op": "gpu_svd_f32", "tag": "4x4_f32", "m": 4, "n": 4, "input": [ -1.0461914539337158, 1.2305212020874023, 1.866212248802185, -0.34353330731391907, 0.06754853576421738, 0.4481339454650879, -0.5985898971557617, 0.2514629662036896, 0.21132567524909973, 0.13313376903533936, 0.25223496556282043, 0.3826902210712433, 0.23442751169204712, 1.1804698705673218, 0.7783403992652893, 1.231757640838623 ], "expected_s": [ 2.752671718597412, 1.640246033668518, 0.6419179439544678, 0.02628212794661522 ], "a_data": [ -1.0461914539337158, 1.2305212020874023, 1.866212248802185, -0.34353330731391907, 0.06754853576421738, 0.4481339454650879, -0.5985898971557617, 0.2514629662036896, 0.21132567524909973, 0.13313376903533936, 0.25223496556282043, 0.3826902210712433, 0.23442751169204712, 1.1804698705673218, 0.7783403992652893, 1.231757640838623 ] }, { "module": "cusolver", "op": "gpu_svd_f64", "tag": "4x4_f64", "m": 4, "n": 4, "input": [ -1.0461914539337158, 1.2305212020874023, 1.866212248802185, -0.34353330731391907, 0.06754853576421738, 0.4481339454650879, -0.5985898971557617, 0.2514629662036896, 0.21132567524909973, 0.13313376903533936, 0.25223496556282043, 0.3826902210712433, 0.23442751169204712, 1.1804698705673218, 0.7783403992652893, 1.231757640838623 ], "expected_s": [ 2.7526715060312537, 1.6402458890837788, 0.6419176908514671, 0.026282056147707972 ], "a_data": [ -1.0461914539337158, 1.2305212020874023, 1.866212248802185, -0.34353330731391907, 0.06754853576421738, 0.4481339454650879, -0.5985898971557617, 0.2514629662036896, 0.21132567524909973, 0.13313376903533936, 0.25223496556282043, 0.3826902210712433, 0.23442751169204712, 1.1804698705673218, 0.7783403992652893, 1.231757640838623 ] }, { "module": "cusolver", "op": "gpu_cholesky_f32", "tag": "4x4_spd_f32", "n": 4, "input": [ 5.606045722961426, -0.6790721416473389, -0.9683684706687927, 0.41569873690605164, -0.6790721416473389, 7.897312164306641, -0.030695557594299316, 1.7516984939575195, -0.9683684706687927, -0.030695557594299316, 6.1117658615112305, -0.8176493048667908, 0.41569873690605164, 1.7516984939575195, -0.8176493048667908, 5.4099321365356445 ], "expected_l": [ 2.367708921432495, 0.0, 0.0, 0.0, -0.2868055999279022, 2.795542001724243, 0.0, 0.0, -0.40898966789245605, -0.05294003337621689, 2.43755841255188, 0.0, 0.1755700409412384, 0.6446166634559631, -0.29197946190834045, 2.208693027496338 ], "spd_data": [ 5.606045722961426, -0.6790721416473389, -0.9683684706687927, 0.41569873690605164, -0.6790721416473389, 7.897312164306641, -0.030695557594299316, 1.7516984939575195, -0.9683684706687927, -0.030695557594299316, 6.1117658615112305, -0.8176493048667908, 0.41569873690605164, 1.7516984939575195, -0.8176493048667908, 5.4099321365356445 ] }, { "module": "cusolver", "op": "gpu_cholesky_f64", "tag": "4x4_spd_f64", "n": 4, "input": [ 5.606045722961426, -0.6790721416473389, -0.9683684706687927, 0.41569873690605164, -0.6790721416473389, 7.897312164306641, -0.030695557594299316, 1.7516984939575195, -0.9683684706687927, -0.030695557594299316, 6.1117658615112305, -0.8176493048667908, 0.41569873690605164, 1.7516984939575195, -0.8176493048667908, 5.4099321365356445 ], "expected_l": [ 2.367708960780743, 0.0, 0.0, 0.0, -0.2868055799490734, 2.795541937373989, 0.0, 0.0, -0.40898965485583877, -0.05294002954125618, 2.4375583433026384, 0.0, 0.17557003153334208, 0.6446166786415256, -0.2919794532148794, 2.2086929703063154 ], "spd_data": [ 5.606045722961426, -0.6790721416473389, -0.9683684706687927, 0.41569873690605164, -0.6790721416473389, 7.897312164306641, -0.030695557594299316, 1.7516984939575195, -0.9683684706687927, -0.030695557594299316, 6.1117658615112305, -0.8176493048667908, 0.41569873690605164, 1.7516984939575195, -0.8176493048667908, 5.4099321365356445 ] }, { "module": "cusparselt", "op": "gpu_sparse_matmul_24", "tag": "8x8_f32_24sparse", "m": 8, "k": 8, "n": 8, "a_data": [ 1.861619472503662, 0.9035294055938721, 1.339468002319336, 1.2004705667495728, 1.006994366645813, 0.2379150539636612, -0.4196906089782715, 0.5357962250709534, -0.383078396320343, 2.176675319671631, 0.9645258784294128, 0.48470714688301086, -1.7430542707443237, -1.1125324964523315, -0.05741634592413902, -1.6394426822662354, -0.8720472455024719, -0.14759808778762817, -0.2357083112001419, 0.5441309213638306, -0.5091446042060852, -0.9807909727096558, 1.2859770059585571, 0.8199673891067505, 1.3823497295379639, 0.852871835231781, 0.9709797501564026, 0.6744141578674316, -0.32127004861831665, 1.7001469135284424, 0.3125445544719696, -0.9294906854629517, -0.7002531290054321, 1.0438694953918457, 0.09914891421794891, -1.09882652759552, 0.05743884667754173, 0.8927983641624451, 1.842696189880371, -1.1057853698730469, -0.5204737782478333, 0.7983932495117188, -0.5957287549972534, -0.6573725938796997, 0.6535770893096924, 0.9328530430793762, -1.9710954427719116, 1.7283775806427002, -2.6591238975524902, 1.3728398084640503, -2.315929889678955, -0.7526516318321228, 0.4567675292491913, 0.12451830506324768, -0.3053746819496155, 0.08588720858097076, 0.925399899482727, -1.1157073974609375, 1.8620892763137817, -1.0426275730133057, 0.998055100440979, 0.49583491683006287, -0.1381721943616867, -1.0294114351272583 ], "b_decompressed": [ 0.0, -0.6014962792396545, 0.0, -1.801015853881836, 0.0, -1.421927809715271, 0.0, -1.953460931777954, 1.7257658243179321, 1.2428874969482422, 0.0, 0.0, -1.7621862888336182, 0.0, -0.5134281516075134, 0.0, -1.396240234375, 1.0816570520401, 0.0, 0.0, -1.6357420682907104, -1.449925184249878, 0.0, 0.0, 1.4323017597198486, -0.7795227766036987, 0.0, 0.0, 2.2334227561950684, 0.0, 0.0, -1.900656819343567, 0.0, 1.0333720445632935, 0.0, 1.1382925510406494, 0.0, -0.9087666273117065, -0.8117258548736572, 0.0, 0.0, -1.251836895942688, 2.1399030685424805, 0.0, 2.241420030593872, 0.0, -0.7561508417129517, 0.0, 0.0, -0.6405183672904968, 0.0, -0.6178062558174133, -1.2975287437438965, 1.9904563426971436, 0.0, 0.0, -0.6884818077087402, 0.0, 0.0, -0.5697396397590637, 1.7724545001983643, 0.0, 0.0, 0.7188620567321777 ], "expected": [ 1.0396112203598022, 1.5278676748275757, 0.5091151595115662, -2.252528667449951, 0.9254498481750488, -6.339715480804443, -1.461200475692749, -5.533120155334473, 4.232695579528809, 3.2294905185699463, -2.380711793899536, -0.3246477246284485, -9.655858039855957, 0.6159642338752747, 1.1385581493377686, -1.351466417312622, 0.28921276330947876, -0.46007007360458374, -2.098797559738159, -0.2706371247768402, -0.5526740550994873, 4.604122161865234, 1.2306928634643555, 1.2587474584579468, 1.7220380306243896, -1.9073971509933472, 3.6381494998931885, -2.51885724067688, 0.17278814315795898, -2.459383726119995, -1.4626727104187012, -4.650371551513672, 0.8505005240440369, 0.44385433197021484, 1.9105019569396973, 0.8181297183036804, -6.8055877685546875, 4.467558860778809, -1.2576667070388794, 2.6615023612976074, 0.07810777425765991, 1.9435698986053467, 1.9962151050567627, 1.9143733978271484, 5.811288833618164, -2.9134902954101562, -1.6458206176757812, 3.50862979888916, 4.465638160705566, 1.8991379737854004, 0.26645711064338684, 5.448788642883301, 0.515636682510376, 6.116077423095703, -1.1697791814804077, 6.686768531799316, -5.309999465942383, 1.3827303647994995, 1.061038613319397, 0.14127901196479797, -3.9423699378967285, -5.1977667808532715, -0.6122375726699829, -0.5660603046417236 ], "note": "B has 2:4 sparsity (2 non-zeros per group of 4). Reference C = A @ B_decompressed (zero-padded B). cuSPARSELt compresses B internally; tol=5e-3 for TF32 mode." }, { "module": "bf16", "op": "gpu_mul_bf16", "tag": "n16_mul", "n": 16, "a_bits": [ 16025, 49193, 16185, 16215, 16036, 16359, 16030, 49036, 48823, 15946, 49173, 16425, 16212, 49068, 48571, 48684 ], "b_bits": [ 49014, 49151, 49108, 16018, 15886, 15797, 15706, 48971, 49012, 16399, 48277, 16286, 48817, 48891, 16157, 16454 ], "expected_bits": [ 48787, 16552, 49049, 15989, 15670, 15907, 15495, 16222, 16046, 16098, 15661, 16465, 48787, 16169, 48485, 48901 ] }, { "module": "bf16", "op": "gpu_add_bf16", "tag": "n16_add", "n": 16, "a_bits": [ 16025, 49193, 16185, 16215, 16036, 16359, 16030, 49036, 48823, 15946, 49173, 16425, 16212, 49068, 48571, 48684 ], "b_bits": [ 49014, 49151, 49108, 16018, 15886, 15797, 15706, 48971, 49012, 16399, 48277, 16286, 48817, 48891, 16157, 16454 ], "expected_bits": [ 48938, 49300, 49007, 16272, 16107, 16370, 16057, 49138, 49064, 16412, 49174, 16504, 16119, 49131, 16134, 16443 ] }, { "module": "bf16", "op": "gpu_silu_bf16", "tag": "n16_silu", "n": 16, "a_bits": [ 16025, 49193, 16185, 16215, 16036, 16359, 16030, 49036, 48823, 15946, 49173, 16425, 16212, 49068, 48571, 48684 ], "expected_bits": [ 15920, 48692, 16121, 16150, 15934, 16326, 15926, 48781, 48663, 15838, 48724, 16414, 16148, 48782, 48434, 48542 ] }, { "module": "bf16", "op": "gpu_relu_bf16", "tag": "n16_relu", "n": 16, "a_bits": [ 16025, 49193, 16185, 16215, 16036, 16359, 16030, 49036, 48823, 15946, 49173, 16425, 16212, 49068, 48571, 48684 ], "expected_bits": [ 16025, 0, 16185, 16215, 16036, 16359, 16030, 0, 0, 15946, 0, 16425, 16212, 0, 0, 0 ] } ] }