Skip to content

Commit

Permalink
Unify prefix for iscc string handling functions
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Feb 11, 2022
1 parent 733b55a commit d7b1d78
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 54 deletions.
16 changes: 8 additions & 8 deletions iscc_core/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def normalize(iscc_code):
########################################################################################


def decode_iscc(iscc):
def iscc_decode(iscc):
# type (str) -> IsccTuple
"""
Decode ISCC to an IsccTuple
Expand All @@ -419,7 +419,7 @@ def decode_iscc(iscc):
return read_header(data)


def explain(iscc):
def iscc_explain(iscc):
# type (str) -> str:
"""
Convert ISCC to a human-readable representation
Expand All @@ -428,8 +428,8 @@ def explain(iscc):
:return: Human-readable representation of ISCC
:rtype: str
"""
tid = type_id(iscc)
fields = decode_iscc(iscc)
tid = iscc_type_id(iscc)
fields = iscc_decode(iscc)
if fields[0] == MT.ID:
counter_bytes = fields[-1][8:]
if counter_bytes:
Expand All @@ -440,7 +440,7 @@ def explain(iscc):
return f"{tid}-{hex_hash}"


def type_id(iscc):
def iscc_type_id(iscc):
# type (str) - str:
"""
Extract and convert ISCC HEADER to a readable Type-ID string.
Expand All @@ -451,7 +451,7 @@ def type_id(iscc):
:return: Unique Type-ID string
:rtype: str
"""
fields = decode_iscc(iscc)
fields = iscc_decode(iscc)
mtype = MT(fields[0])
stype = SUBTYPE_MAP[fields[0]](fields[1])

Expand All @@ -466,8 +466,8 @@ def type_id(iscc):
return f"{mtype.name}-{stype.name}-{version.name}-{length}"


def validate(iscc, strict=True):
# type: (str) -> bool
def iscc_validate(iscc, strict=True):
# type: (str, bool) -> bool
"""
Validate that a given string is a *strictly well-formed* ISCC.
Expand Down
26 changes: 14 additions & 12 deletions iscc_core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
"json_canonical",
"ipfs_hash",
"sliding_window",
"similarity",
"distance",
"hamming_distance",
"iscc_similarity",
"iscc_distance",
"hamming_distance_bytes",
]


Expand Down Expand Up @@ -111,7 +111,7 @@ def sliding_window(seq, width):
return (seq[i : i + width] for i in idx)


def similarity(a, b):
def iscc_similarity(a, b):
# type: (str, str) -> int
"""
Calculate similarity of ISCC codes as a percentage value (0-100).
Expand All @@ -123,14 +123,14 @@ def similarity(a, b):
:return: Similarity of ISCC a and b in percent (based on hamming distance)
:rtype: int
"""
a, b = _safe_unpack(a, b)
hdist = hamming_distance(a, b)
a, b = iscc_pair_unpack(a, b)
hdist = hamming_distance_bytes(a, b)
nbits = len(a) * 8
sim = int(((nbits - hdist) / nbits) * 100)
return sim


def distance(a, b):
def iscc_distance(a, b):
# type: (str, str) -> int
"""
Calculate hamming distance of ISCC codes.
Expand All @@ -142,11 +142,11 @@ def distance(a, b):
:return: Hamming distanced in number of bits.
:rtype: int
"""
a, b = _safe_unpack(a, b)
return hamming_distance(a, b)
a, b = iscc_pair_unpack(a, b)
return hamming_distance_bytes(a, b)


def hamming_distance(a, b):
def hamming_distance_bytes(a, b):
# type: (bytes, bytes) -> int
"""
Calculate hamming distance for binary hash digests of equal length.
Expand All @@ -164,10 +164,12 @@ def hamming_distance(a, b):
return count_xor(ba, bb)


def _safe_unpack(a, b):
def iscc_pair_unpack(a, b):
# type: (str, str) -> Tuple[bytes, bytes]
"""
Unpack two ISCC codes and return their hash digests if their headers match.
Unpack two ISCC codes and return their body hash digests if their headers match.
Headers match if their MainType, SubType, and Version are identical.
:param a: ISCC a
:param b: ISCC b
Expand Down
26 changes: 13 additions & 13 deletions tests/test_codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,56 +476,56 @@ def test_codec_Code_rnd_mt_iscc():

def test_codec_validate_regex():
valid = ic.gen_meta_code("Hello World", bits=32)["iscc"]
assert ic.validate(valid) is True
assert ic.iscc_validate(valid) is True
invalid = valid[:-1]
assert ic.validate(invalid, strict=False) is False
assert ic.iscc_validate(invalid, strict=False) is False
with pytest.raises(ValueError):
ic.validate(invalid, strict=True)
ic.iscc_validate(invalid, strict=True)


def test_codec_validate_header_prefix():
valid = ic.gen_meta_code("Hello World", bits=32)["iscc"]
invalid = "ISCC:AE" + valid[7:]
assert ic.validate(invalid, strict=False) is False
assert ic.iscc_validate(invalid, strict=False) is False
with pytest.raises(ValueError):
ic.validate(invalid)
ic.iscc_validate(invalid)


def test_decode_iscc():
assert ic.decode_iscc("AAAQCAAAAABAAAAA") == (0, 0, 0, 1, b"\x01\x00\x00\x00\x02\x00\x00\x00")
assert ic.iscc_decode("AAAQCAAAAABAAAAA") == (0, 0, 0, 1, b"\x01\x00\x00\x00\x02\x00\x00\x00")


def test_type_id_maintype_meta():
assert ic.type_id("AAAQCAAAAABAAAAA") == "META-NONE-V0-64"
assert ic.iscc_type_id("AAAQCAAAAABAAAAA") == "META-NONE-V0-64"


def test_type_id_maintype_iscc_code():
iscc = "KICQOCPJM46YUUCBMWS6FFXRGM3LJOU5MZOVPOUHIJOHPI324GKN67Q"
assert ic.type_id(iscc) == "ISCC-AUDIO-V0-MCDI"
assert ic.iscc_type_id(iscc) == "ISCC-AUDIO-V0-MCDI"


def test_type_id_maintype_iscc_id():
iscc = "MEAAO5JRN22FN2M2"
assert ic.type_id(iscc) == "ID-BITCOIN-V0-64"
assert ic.iscc_type_id(iscc) == "ID-BITCOIN-V0-64"


def test_explain_maintype_meta():
assert ic.explain("AAAQCAAAAABAAAAA") == "META-NONE-V0-64-0100000002000000"
assert ic.iscc_explain("AAAQCAAAAABAAAAA") == "META-NONE-V0-64-0100000002000000"


def test_explain_maintype_iscc_code():
iscc = "KICQOCPJM46YUUCBMWS6FFXRGM3LJOU5MZOVPOUHIJOHPI324GKN67Q"
assert (
ic.explain(iscc)
ic.iscc_explain(iscc)
== "ISCC-AUDIO-V0-MCDI-0709e9673d8a504165a5e296f13336b4ba9d665d57ba87425c77a37ae194df7e"
)


def test_explain_maintype_iscc_id_no_counter():
iscc = "MEAAO5JRN22FN2M2"
assert ic.explain(iscc) == "ID-BITCOIN-V0-64-0775316eb456e99a"
assert ic.iscc_explain(iscc) == "ID-BITCOIN-V0-64-0775316eb456e99a"


def test_explain_maintype_iscc_id_counter():
iscc = "ISCC:MAASAJINXFXA2SQXAE"
assert ic.explain(iscc) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1"
assert ic.iscc_explain(iscc) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1"
12 changes: 6 additions & 6 deletions tests/test_iscc_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_gen_iscc_code_full():
icode = ic.gen_iscc_code([MID_64, CID_64, DID_128, IID_256])
assert icode == {"iscc": "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"}
assert (
ic.explain(icode["iscc"])
ic.iscc_explain(icode["iscc"])
== "ISCC-TEXT-V0-MCDI-87dedce74b3c353b16257380a960347a5a8ba362526c2b3c642be0fa67404343"
)

Expand All @@ -25,7 +25,7 @@ def test_gen_iscc_code_v0_full():
icode = ic.gen_iscc_code_v0([MID_64, CID_64, DID_128, IID_256])
assert icode == {"iscc": "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"}
assert (
ic.explain(icode["iscc"])
ic.iscc_explain(icode["iscc"])
== "ISCC-TEXT-V0-MCDI-87dedce74b3c353b16257380a960347a5a8ba362526c2b3c642be0fa67404343"
)

Expand All @@ -34,7 +34,7 @@ def test_gen_iscc_code_v0_no_meta():
icode = ic.gen_iscc_code_v0([CID_64, DID_128, IID_256])
assert icode == {"iscc": "ISCC:KAARMJLTQCUWAND2LKF2GYSSNQVTYZBL4D5GOQCDIM"}
assert (
ic.explain(icode["iscc"])
ic.iscc_explain(icode["iscc"])
== "ISCC-TEXT-V0-CDI-16257380a960347a5a8ba362526c2b3c642be0fa67404343"
)

Expand All @@ -44,21 +44,21 @@ def test_gen_iscc_code_v0_no_meta_content():
assert icode == {"iscc": "ISCC:KUAFVC5DMJJGYKZ4MQV6B6THIBBUG"}
# TODO mabye show length for SubType SUM as we now the unit composition.
# we may also get a ISCC-SUM-V0-256 version
assert ic.explain(icode["iscc"]) == "ISCC-SUM-V0-DI-5a8ba362526c2b3c642be0fa67404343"
assert ic.iscc_explain(icode["iscc"]) == "ISCC-SUM-V0-DI-5a8ba362526c2b3c642be0fa67404343"


def test_gen_iscc_code_v0_no_meta_content_128():
icode = ic.gen_iscc_code_v0([DID_64, IID_256])
assert icode == {"iscc": "ISCC:KUAAQICFKJYKY4KUMQV6B6THIBBUG"}
assert ic.explain(icode["iscc"]) == "ISCC-SUM-V0-DI-0820455270ac7154642be0fa67404343"
assert ic.iscc_explain(icode["iscc"]) == "ISCC-SUM-V0-DI-0820455270ac7154642be0fa67404343"
assert ISCC(**icode).iscc == "ISCC:KUAAQICFKJYKY4KUMQV6B6THIBBUG"


def test_gen_iscc_code_v0_ordering():
icode = ic.gen_iscc_code_v0([CID_64, MID_64, IID_256, DID_128])
assert icode == {"iscc": "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"}
assert (
ic.explain(icode["iscc"])
ic.iscc_explain(icode["iscc"])
== "ISCC-TEXT-V0-MCDI-87dedce74b3c353b16257380a960347a5a8ba362526c2b3c642be0fa67404343"
)
assert ISCC(**icode).iscc == "ISCC:KACYPXW445FTYNJ3CYSXHAFJMA2HUWULUNRFE3BLHRSCXYH2M5AEGQY"
Expand Down
12 changes: 6 additions & 6 deletions tests/test_iscc_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ def test_gen_iscc_id_v0_single_component():
tc = ic.gen_text_code_v0("Hello World")
iscc_id = ic.gen_iscc_id(0, tc["iscc"])
assert iscc_id == {"iscc": "ISCC:MAACAJINXFXA2SQX"}
assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-20250db96e0d4a17"
assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-20250db96e0d4a17"


def test_gen_iscc_id_v0_single_component_uc():
tc = ic.gen_text_code_v0("Hello World")
iscc_id = ic.gen_iscc_id(0, tc["iscc"], uc=1)
assert iscc_id == {"iscc": "ISCC:MAASAJINXFXA2SQXAE"}
assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1"
assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-72-20250db96e0d4a17-1"


def test_gen_iscc_id_v0_single_component_uc_2byte():
tc = ic.gen_text_code_v0("Hello World")
iscc_id = ic.gen_iscc_id(0, tc["iscc"], uc=257)
assert iscc_id["iscc"] == "ISCC:MABCAJINXFXA2SQXQEBA"
assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-80-20250db96e0d4a17-257"
assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-80-20250db96e0d4a17-257"


def test_gen_iscc_id_v0_multiple_components():
Expand All @@ -33,7 +33,7 @@ def test_gen_iscc_id_v0_multiple_components():
code = ic.iscc_clean(mc) + ic.iscc_clean(tc)
iscc_id = ic.gen_iscc_id(1, code)
assert iscc_id["iscc"] == "ISCC:MEACANI57VXZ67R7"
assert ic.explain(iscc_id["iscc"]) == "ID-BITCOIN-V0-64-20351dfd6f9f7e3f"
assert ic.iscc_explain(iscc_id["iscc"]) == "ID-BITCOIN-V0-64-20351dfd6f9f7e3f"


def test_gen_iscc_id_v0_full_code():
Expand All @@ -46,7 +46,7 @@ def test_gen_iscc_id_v0_instance_only():
icode = ic.gen_instance_code_v0(io.BytesIO(b"hello world"))
iscc_id = ic.gen_iscc_id(0, icode["iscc"])
assert iscc_id["iscc"] == "ISCC:MAAEBV2JQHX2OCQM"
assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-40d74981efa70a0c"
assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-40d74981efa70a0c"


def test_gen_iscc_id_v0_data_instance():
Expand All @@ -55,7 +55,7 @@ def test_gen_iscc_id_v0_data_instance():
iscc_sum = ic.gen_iscc_code_v0([icode["iscc"], dc["iscc"]])
iscc_id = ic.gen_iscc_id(0, iscc_sum["iscc"])
assert iscc_id["iscc"] == "ISCC:MAADB7WD7TC5XELQ"
assert ic.explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-30fec3fcc5db9170"
assert ic.iscc_explain(iscc_id["iscc"]) == "ID-PRIVATE-V0-64-30fec3fcc5db9170"


def test_incr_iscc_id():
Expand Down
18 changes: 9 additions & 9 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,43 +13,43 @@


def test_hamming_distance():
assert ic.hamming_distance(A_BYT, B_BYT) == 4
assert ic.hamming_distance_bytes(A_BYT, B_BYT) == 4


def test_similarity_single_64():
ia = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=A_BYT).code
ib = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=B_BYT).code
assert ic.similarity(ia, ib) == 93
assert ic.iscc_similarity(ia, ib) == 93


def test_similarity_single_256():
a = "AAD7SATLZUS57KXZZL2HXAD7HT6264AHEIRZQ4QTLB6LHVRXNTLE7MA"
b = "AAD7CATK5QX46LX5YL2HXIH7FT626UAHE4RYC4QTDB6LXVRXNDJE7MA"
assert ic.similarity(a, b) == 90
assert ic.iscc_similarity(a, b) == 90


def test_similarity_composite():
a = "KQD7SATLZUS57KXZN2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA"
b = "KQD7CATK5QX46LX5N2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA"
assert ic.similarity(a, b) == 96
assert ic.iscc_similarity(a, b) == 96


def test_distance_single_64():
ia = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=A_BYT).code
ib = ic.Code.rnd(mt=ic.MT.CONTENT, st=ic.ST_ISCC.IMAGE, bits=64, data=B_BYT).code
assert ic.distance(ia, ib) == 4
assert ic.iscc_distance(ia, ib) == 4


def test_distance_single_256():
a = "AAD7SATLZUS57KXZZL2HXAD7HT6264AHEIRZQ4QTLB6LHVRXNTLE7MA"
b = "AAD7CATK5QX46LX5YL2HXIH7FT626UAHE4RYC4QTDB6LXVRXNDJE7MA"
assert ic.distance(a, b) == 24
assert ic.iscc_distance(a, b) == 24


def test_distance_composite():
a = "KQD7SATLZUS57KXZN2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA"
b = "KQD7CATK5QX46LX5N2N6SA6A3THBJRQW4B5CZPGWU2PR566ZQNLM2AA"
assert ic.distance(a, b) == 10
assert ic.iscc_distance(a, b) == 10


def test_sliding_window():
Expand Down Expand Up @@ -93,7 +93,7 @@ def test__safe_unpack():
a = ic.Code.rnd(ic.MT.META, bits=64).code
b = ic.Code.rnd(ic.MT.DATA, bits=64).code
with pytest.raises(ValueError):
ic.utils._safe_unpack(a, b)
ic.utils.iscc_pair_unpack(a, b)


def test_ipfs_hash(static_bytes):
Expand Down Expand Up @@ -121,4 +121,4 @@ def test_sliding_window_raises():
def test_hamming_distance_raises():
a, b = os.urandom(8), os.urandom(9)
with pytest.raises(AssertionError):
ic.hamming_distance(a, b)
ic.hamming_distance_bytes(a, b)

0 comments on commit d7b1d78

Please sign in to comment.