diff --git a/iscc_core/codec.py b/iscc_core/codec.py index c504dfc..6699582 100644 --- a/iscc_core/codec.py +++ b/iscc_core/codec.py @@ -405,7 +405,7 @@ def normalize(iscc_code): ######################################################################################## -def decode_iscc(iscc): +def iscc_decode(iscc): # type (str) -> IsccTuple """ Decode ISCC to an IsccTuple @@ -419,7 +419,7 @@ def decode_iscc(iscc): return read_header(data) -def explain(iscc): +def iscc_explain(iscc): # type (str) -> str: """ Convert ISCC to a human-readable representation @@ -428,8 +428,8 @@ def explain(iscc): :return: Human-readable representation of ISCC :rtype: str """ - tid = type_id(iscc) - fields = decode_iscc(iscc) + tid = iscc_type_id(iscc) + fields = iscc_decode(iscc) if fields[0] == MT.ID: counter_bytes = fields[-1][8:] if counter_bytes: @@ -440,7 +440,7 @@ def explain(iscc): return f"{tid}-{hex_hash}" -def type_id(iscc): +def iscc_type_id(iscc): # type (str) - str: """ Extract and convert ISCC HEADER to a readable Type-ID string. @@ -451,7 +451,7 @@ def type_id(iscc): :return: Unique Type-ID string :rtype: str """ - fields = decode_iscc(iscc) + fields = iscc_decode(iscc) mtype = MT(fields[0]) stype = SUBTYPE_MAP[fields[0]](fields[1]) @@ -466,8 +466,8 @@ def type_id(iscc): return f"{mtype.name}-{stype.name}-{version.name}-{length}" -def validate(iscc, strict=True): - # type: (str) -> bool +def iscc_validate(iscc, strict=True): + # type: (str, bool) -> bool """ Validate that a given string is a *strictly well-formed* ISCC. diff --git a/iscc_core/utils.py b/iscc_core/utils.py index 15d9c17..4e174ee 100644 --- a/iscc_core/utils.py +++ b/iscc_core/utils.py @@ -13,9 +13,9 @@ "json_canonical", "ipfs_hash", "sliding_window", - "similarity", - "distance", - "hamming_distance", + "iscc_similarity", + "iscc_distance", + "hamming_distance_bytes", ] @@ -111,7 +111,7 @@ def sliding_window(seq, width): return (seq[i : i + width] for i in idx) -def similarity(a, b): +def iscc_similarity(a, b): # type: (str, str) -> int """ Calculate similarity of ISCC codes as a percentage value (0-100). @@ -123,14 +123,14 @@ def similarity(a, b): :return: Similarity of ISCC a and b in percent (based on hamming distance) :rtype: int """ - a, b = _safe_unpack(a, b) - hdist = hamming_distance(a, b) + a, b = iscc_pair_unpack(a, b) + hdist = hamming_distance_bytes(a, b) nbits = len(a) * 8 sim = int(((nbits - hdist) / nbits) * 100) return sim -def distance(a, b): +def iscc_distance(a, b): # type: (str, str) -> int """ Calculate hamming distance of ISCC codes. @@ -142,11 +142,11 @@ def distance(a, b): :return: Hamming distanced in number of bits. :rtype: int """ - a, b = _safe_unpack(a, b) - return hamming_distance(a, b) + a, b = iscc_pair_unpack(a, b) + return hamming_distance_bytes(a, b) -def hamming_distance(a, b): +def hamming_distance_bytes(a, b): # type: (bytes, bytes) -> int """ Calculate hamming distance for binary hash digests of equal length. @@ -164,10 +164,12 @@ def hamming_distance(a, b): return count_xor(ba, bb) -def _safe_unpack(a, b): +def iscc_pair_unpack(a, b): # type: (str, str) -> Tuple[bytes, bytes] """ - Unpack two ISCC codes and return their hash digests if their headers match. + Unpack two ISCC codes and return their body hash digests if their headers match. + + Headers match if their MainType, SubType, and Version are identical. :param a: ISCC a :param b: ISCC b diff --git a/tests/test_codec.py b/tests/test_codec.py index 6824989..a51bc95 100644 --- a/tests/test_codec.py +++ b/tests/test_codec.py @@ -476,56 +476,56 @@ def test_codec_Code_rnd_mt_iscc(): def test_codec_validate_regex(): valid = ic.gen_meta_code("Hello World", bits=32)["iscc"] - assert ic.validate(valid) is True + assert ic.iscc_validate(valid) is True invalid = valid[:-1] - assert ic.validate(invalid, strict=False) is False + assert ic.iscc_validate(invalid, strict=False) is False with pytest.raises(ValueError): - ic.validate(invalid, strict=True) + ic.iscc_validate(invalid, strict=True) def test_codec_validate_header_prefix(): valid = ic.gen_meta_code("Hello World", bits=32)["iscc"] invalid = "ISCC:AE" + valid[7:] - assert ic.validate(invalid, strict=False) is False + assert ic.iscc_validate(invalid, strict=False) is False with pytest.raises(ValueError): - ic.validate(invalid) + ic.iscc_validate(invalid) def test_decode_iscc(): - assert ic.decode_iscc("AAAQCAAAAABAAAAA") == (0, 0, 0, 1, b"\x01\x00\x00\x00\x02\x00\x00\x00") + assert ic.iscc_decode("AAAQCAAAAABAAAAA") == (0, 0, 0, 1, b"\x01\x00\x00\x00\x02\x00\x00\x00") def test_type_id_maintype_meta(): - assert ic.type_id("AAAQCAAAAABAAAAA") == "META-NONE-V0-64" + assert ic.iscc_type_id("AAAQCAAAAABAAAAA") == "META-NONE-V0-64" def test_type_id_maintype_iscc_code(): iscc = "KICQOCPJM46YUUCBMWS6FFXRGM3LJOU5MZOVPOUHIJOHPI324GKN67Q" - assert ic.type_id(iscc) == "ISCC-AUDIO-V0-MCDI" + assert ic.iscc_type_id(iscc) == "ISCC-AUDIO-V0-MCDI" def test_type_id_maintype_iscc_id(): iscc = "MEAAO5JRN22FN2M2" - assert ic.type_id(iscc) == "ID-BITCOIN-V0-64" + assert ic.iscc_type_id(iscc) == "ID-BITCOIN-V0-64" def test_explain_maintype_meta(): - assert ic.explain("AAAQCAAAAABAAAAA") == "META-NONE-V0-64-0100000002000000" + assert ic.iscc_explain("AAAQCAAAAABAAAAA") == "META-NONE-V0-64-0100000002000000" def test_explain_maintype_iscc_code(): iscc = "KICQOCPJM46YUUCBMWS6FFXRGM3LJOU5MZOVPOUHIJOHPI324GKN67Q" assert ( - ic.explain(iscc) + ic.iscc_explain(iscc) == "ISCC-AUDIO-V0-MCDI-0709e9673d8a504165a5e296f13336b4ba9d665d57ba87425c77a37ae194df7e" ) def test_explain_maintype_iscc_id_no_counter(): iscc = "MEAAO5JRN22FN2M2" - assert ic.explain(iscc) == "ID-BITCOIN-V0-64-0775316eb456e99a" + assert ic.iscc_explain(iscc) == "ID-BITCOIN-V0-64-0775316eb456e99a" def test_explain_maintype_iscc_id_counter(): iscc = "ISCC:MAASAJINXFXA2SQXAE" - assert ic.explain(iscc) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1" + assert ic.iscc_explain(iscc) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1" diff --git a/tests/test_iscc_code.py b/tests/test_iscc_code.py index a456186..1888985 100644 --- a/tests/test_iscc_code.py +++ b/tests/test_iscc_code.py @@ -16,7 +16,7 @@ def test_gen_iscc_code_full(): icode = ic.gen_iscc_code([MID_64, CID_64, DID_128, IID_256]) assert icode == {"iscc": "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"} assert ( - ic.explain(icode["iscc"]) + ic.iscc_explain(icode["iscc"]) == "ISCC-TEXT-V0-MCDI-87dedce74b3c353b16257380a960347a5a8ba362526c2b3c642be0fa67404343" ) @@ -25,7 +25,7 @@ def test_gen_iscc_code_v0_full(): icode = ic.gen_iscc_code_v0([MID_64, CID_64, DID_128, IID_256]) assert icode == {"iscc": "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"} assert ( - ic.explain(icode["iscc"]) + ic.iscc_explain(icode["iscc"]) == "ISCC-TEXT-V0-MCDI-87dedce74b3c353b16257380a960347a5a8ba362526c2b3c642be0fa67404343" ) @@ -34,7 +34,7 @@ def test_gen_iscc_code_v0_no_meta(): icode = ic.gen_iscc_code_v0([CID_64, DID_128, IID_256]) assert icode == {"iscc": "ISCC:KAARMJLTQCUWAND2LKF2GYSSNQVTYZBL4D5GOQCDIM"} assert ( - ic.explain(icode["iscc"]) + ic.iscc_explain(icode["iscc"]) == "ISCC-TEXT-V0-CDI-16257380a960347a5a8ba362526c2b3c642be0fa67404343" ) @@ -44,13 +44,13 @@ def test_gen_iscc_code_v0_no_meta_content(): assert icode == {"iscc": "ISCC:KUAFVC5DMJJGYKZ4MQV6B6THIBBUG"} # TODO mabye show length for SubType SUM as we now the unit composition. # we may also get a ISCC-SUM-V0-256 version - assert ic.explain(icode["iscc"]) == "ISCC-SUM-V0-DI-5a8ba362526c2b3c642be0fa67404343" + assert ic.iscc_explain(icode["iscc"]) == "ISCC-SUM-V0-DI-5a8ba362526c2b3c642be0fa67404343" def test_gen_iscc_code_v0_no_meta_content_128(): icode = ic.gen_iscc_code_v0([DID_64, IID_256]) assert icode == {"iscc": "ISCC:KUAAQICFKJYKY4KUMQV6B6THIBBUG"} - assert ic.explain(icode["iscc"]) == "ISCC-SUM-V0-DI-0820455270ac7154642be0fa67404343" + assert ic.iscc_explain(icode["iscc"]) == "ISCC-SUM-V0-DI-0820455270ac7154642be0fa67404343" assert ISCC(**icode).iscc == "ISCC:KUAAQICFKJYKY4KUMQV6B6THIBBUG" @@ -58,7 +58,7 @@ def test_gen_iscc_code_v0_ordering(): icode = ic.gen_iscc_code_v0([CID_64, MID_64, IID_256, DID_128]) assert icode == {"iscc": "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"} assert ( - ic.explain(icode["iscc"]) + ic.iscc_explain(icode["iscc"]) == "ISCC-TEXT-V0-MCDI-87dedce74b3c353b16257380a960347a5a8ba362526c2b3c642be0fa67404343" ) assert ISCC(**icode).iscc == "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY" diff --git a/tests/test_iscc_id.py b/tests/test_iscc_id.py index 9018db1..3541268 100644 --- a/tests/test_iscc_id.py +++ b/tests/test_iscc_id.py @@ -10,21 +10,21 @@ def test_gen_iscc_id_v0_single_component(): tc = ic.gen_text_code_v0("Hello World") iscc_id = ic.gen_iscc_id(0, tc["iscc"]) assert iscc_id == {"iscc": "ISCC:MAACAJINXFXA2SQX"} - assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-20250db96e0d4a17" + assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-20250db96e0d4a17" def test_gen_iscc_id_v0_single_component_uc(): tc = ic.gen_text_code_v0("Hello World") iscc_id = ic.gen_iscc_id(0, tc["iscc"], uc=1) assert iscc_id == {"iscc": "ISCC:MAASAJINXFXA2SQXAE"} - assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1" + assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1" def test_gen_iscc_id_v0_single_component_uc_2byte(): tc = ic.gen_text_code_v0("Hello World") iscc_id = ic.gen_iscc_id(0, tc["iscc"], uc=257) assert iscc_id["iscc"] == "ISCC:MABCAJINXFXA2SQXQEBA" - assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-80-20250db96e0d4a17-257" + assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-80-20250db96e0d4a17-257" def test_gen_iscc_id_v0_multiple_components(): @@ -33,7 +33,7 @@ def test_gen_iscc_id_v0_multiple_components(): code = ic.iscc_clean(mc) + ic.iscc_clean(tc) iscc_id = ic.gen_iscc_id(1, code) assert iscc_id["iscc"] == "ISCC:MEACANI57VXZ67R7" - assert ic.explain(iscc_id["iscc"]) == "ID-BITCOIN-V0-64-20351dfd6f9f7e3f" + assert ic.iscc_explain(iscc_id["iscc"]) == "ID-BITCOIN-V0-64-20351dfd6f9f7e3f" def test_gen_iscc_id_v0_full_code(): @@ -46,7 +46,7 @@ def test_gen_iscc_id_v0_instance_only(): icode = ic.gen_instance_code_v0(io.BytesIO(b"hello world")) iscc_id = ic.gen_iscc_id(0, icode["iscc"]) assert iscc_id["iscc"] == "ISCC:MAAEBV2JQHX2OCQM" - assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-40d74981efa70a0c" + assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-40d74981efa70a0c" def test_gen_iscc_id_v0_data_instance(): @@ -55,7 +55,7 @@ def test_gen_iscc_id_v0_data_instance(): iscc_sum = ic.gen_iscc_code_v0([icode["iscc"], dc["iscc"]]) iscc_id = ic.gen_iscc_id(0, iscc_sum["iscc"]) assert iscc_id["iscc"] == "ISCC:MAADB7WD7TC5XELQ" - assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-30fec3fcc5db9170" + assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-30fec3fcc5db9170" def test_incr_iscc_id(): diff --git a/tests/test_utils.py b/tests/test_utils.py index 20c99c5..2b6a6c8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,43 +13,43 @@ def test_hamming_distance(): - assert ic.hamming_distance(A_BYT, B_BYT) == 4 + assert ic.hamming_distance_bytes(A_BYT, B_BYT) == 4 def test_similarity_single_64(): ia = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=A_BYT).code ib = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=B_BYT).code - assert ic.similarity(ia, ib) == 93 + assert ic.iscc_similarity(ia, ib) == 93 def test_similarity_single_256(): a = "AAD7SATLZUS57KXZZL2HXAD7HT6264AHEIRZQ4QTLB6LHVRXNTLE7MA" b = "AAD7CATK5QX46LX5YL2HXIH7FT626UAHE4RYC4QTDB6LXVRXNDJE7MA" - assert ic.similarity(a, b) == 90 + assert ic.iscc_similarity(a, b) == 90 def test_similarity_composite(): a = "KQD7SATLZUS57KXZN2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA" b = "KQD7CATK5QX46LX5N2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA" - assert ic.similarity(a, b) == 96 + assert ic.iscc_similarity(a, b) == 96 def test_distance_single_64(): ia = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=A_BYT).code ib = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=B_BYT).code - assert ic.distance(ia, ib) == 4 + assert ic.iscc_distance(ia, ib) == 4 def test_distance_single_256(): a = "AAD7SATLZUS57KXZZL2HXAD7HT6264AHEIRZQ4QTLB6LHVRXNTLE7MA" b = "AAD7CATK5QX46LX5YL2HXIH7FT626UAHE4RYC4QTDB6LXVRXNDJE7MA" - assert ic.distance(a, b) == 24 + assert ic.iscc_distance(a, b) == 24 def test_distance_composite(): a = "KQD7SATLZUS57KXZN2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA" b = "KQD7CATK5QX46LX5N2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA" - assert ic.distance(a, b) == 10 + assert ic.iscc_distance(a, b) == 10 def test_sliding_window(): @@ -93,7 +93,7 @@ def test__safe_unpack(): a = ic.Code.rnd(ic.MT.META, bits=64).code b = ic.Code.rnd(ic.MT.DATA, bits=64).code with pytest.raises(ValueError): - ic.utils._safe_unpack(a, b) + ic.utils.iscc_pair_unpack(a, b) def test_ipfs_hash(static_bytes): @@ -121,4 +121,4 @@ def test_sliding_window_raises(): def test_hamming_distance_raises(): a, b = os.urandom(8), os.urandom(9) with pytest.raises(AssertionError): - ic.hamming_distance(a, b) + ic.hamming_distance_bytes(a, b)