Skip to content

Commit

Permalink
Add iscc prefix decompose and normalize functions
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Feb 11, 2022
1 parent c3f11b3 commit 94f70a8
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 43 deletions.
10 changes: 5 additions & 5 deletions iscc_core/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def decode_base64(code: str) -> bytes:
return urlsafe_b64decode(string)


def decompose(iscc_code):
def iscc_decompose(iscc_code):
# type: (str) -> List[str]
"""
Decompose an ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.
Expand Down Expand Up @@ -345,7 +345,7 @@ def decompose(iscc_code):
return components


def normalize(iscc_code):
def iscc_normalize(iscc_code):
# type: (str) -> str
"""
Normalize an ISCC to its canonical URI form.
Expand All @@ -369,7 +369,7 @@ def normalize(iscc_code):
!!! example
``` py
>>> import iscc_core
>>> iscc_core.normalize("GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
>>> iscc_core.iscc_normalize("GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
ISCC:KUBW2PRCRS5LNVZVJKAFAVOJXLZZM
```
Expand All @@ -395,7 +395,7 @@ def normalize(iscc_code):
raise ValueError(f"Malformed multiformat codec: {decoded[:2]}")
iscc_code = encode_base32(decoded[2:])

decomposed = decompose(iscc_code)
decomposed = iscc_decompose(iscc_code)
recomposed = gen_iscc_code_v0(decomposed)["iscc"] if len(decomposed) >= 2 else decomposed[0]
return f"ISCC:{recomposed}" if not recomposed.startswith("ISCC:") else recomposed

Expand All @@ -414,7 +414,7 @@ def iscc_decode(iscc):
:return: ISCC decoded to an IsccTuple
:rtype: IsccTuple
"""
iscc = iscc_clean(normalize(iscc))
iscc = iscc_clean(iscc_normalize(iscc))
data = decode_base32(iscc)
return read_header(data)

Expand Down
2 changes: 1 addition & 1 deletion iscc_core/iscc_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def soft_hash_iscc_id_v0(iscc_code, uc=0):
:return: Digest for ISCC-ID without header but including uniqueness counter.
:rtype: bytes
"""
components = ic.decompose(iscc_code)
components = ic.iscc_decompose(iscc_code)
decoded = [ic.decode_base32(c) for c in components]
unpacked = [ic.read_header(d) for d in decoded]

Expand Down
2 changes: 1 addition & 1 deletion iscc_core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def iscc_pair_unpack(a, b):
:rtype: Tuple[bytes, bytes]
:raise ValueError: If ISCC headers don´t match
"""
a, b = ic.iscc_clean(ic.normalize(a)), ic.iscc_clean(ic.normalize(b))
a, b = ic.iscc_clean(ic.iscc_normalize(a)), ic.iscc_clean(ic.iscc_normalize(b))
a, b = ic.decode_base32(a), ic.decode_base32(b)
a, b = ic.read_header(a), ic.read_header(b)
if not a[:-1] == b[:-1]:
Expand Down
72 changes: 36 additions & 36 deletions tests/test_codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,28 +185,28 @@ def test_Code_mf_base16():
mco = ic.Code(ic.gen_meta_code("Hello base16")["iscc"])
assert mco.code == "AAATKVHH3C7FOAAZ"
assert mco.mf_base16 == "fcc0100013554e7d8be570019"
assert ic.normalize("fcc0100013554e7d8be570019") == "ISCC:AAATKVHH3C7FOAAZ"
assert ic.iscc_normalize("fcc0100013554e7d8be570019") == "ISCC:AAATKVHH3C7FOAAZ"


def test_Code_mf_base32():
mco = ic.Code(ic.gen_meta_code("Hello base32")["iscc"])
assert mco.code == "AAAQKV7H7K7VMAEL"
assert mco.mf_base32 == "bzqaqaaifk7t7vp2wacfq"
assert ic.normalize("bzqaqaaifk7t7vp2wacfq") == "ISCC:AAAQKV7H7K7VMAEL"
assert ic.iscc_normalize("bzqaqaaifk7t7vp2wacfq") == "ISCC:AAAQKV7H7K7VMAEL"


def test_Code_mf_base58btc():
mco = ic.Code(ic.gen_meta_code("Hello base58btc")["iscc"])
assert mco.code == "AAA2O57HTG7HMCO3"
assert mco.mf_base58btc == "z4rHVQUrFdpfYWuGLa"
assert ic.normalize("z4rHVQUrFdpfYWuGLa") == "ISCC:AAA2O57HTG7HMCO3"
assert ic.iscc_normalize("z4rHVQUrFdpfYWuGLa") == "ISCC:AAA2O57HTG7HMCO3"


def test_Code_mf_base64url():
mco = ic.Code(ic.gen_meta_code("This is a base64url encoded Meta-Code")["iscc"])
assert mco.code == "AAAYSN37BCO2L3O7"
assert mco.mf_base64url == "uzAEAAYk3fwidpe3f"
assert ic.normalize("uzAEAAYk3fwidpe3f") == "ISCC:AAAYSN37BCO2L3O7"
assert ic.iscc_normalize("uzAEAAYk3fwidpe3f") == "ISCC:AAAYSN37BCO2L3O7"


def test_Code_raises():
Expand Down Expand Up @@ -236,15 +236,15 @@ def test_decompose_single_component():
)
for mt in mts:
code = ic.Code.rnd(mt=mt)
assert ic.decompose(code.code)[0] == code.code
assert ic.iscc_decompose(code.code)[0] == code.code


def test_decompose_data_instance():
data = "GABTMCHNLCHTI2NHZFXOLEB53KSPU"
inst = "IAB3GN6WUSNSX3MJBT6PBTVFAQZ7G"
code = ic.gen_iscc_code_v0([data, inst])["iscc"]
assert code == "ISCC:KUADMCHNLCHTI2NHWM35NJE3FPWYS"
assert ic.decompose(code) == ["GAATMCHNLCHTI2NH", "IAA3GN6WUSNSX3MJ"]
assert ic.iscc_decompose(code) == ["GAATMCHNLCHTI2NH", "IAA3GN6WUSNSX3MJ"]


def test_decompose_content_data_instance():
Expand All @@ -254,7 +254,7 @@ def test_decompose_content_data_instance():
di = [cont, data, inst]
code = ic.gen_iscc_code_v0([cont, data, inst])["iscc"]
assert code == "ISCC:KMARIURG4CVZ3M7N6JD7UWYB4Q4Q47MLIGQWX256CU"
assert ic.decompose(code) == di
assert ic.iscc_decompose(code) == di


def test_decompose_meta_content_data_instance():
Expand All @@ -265,7 +265,7 @@ def test_decompose_meta_content_data_instance():
di = [meta, cont, data, inst]
code = ic.gen_iscc_code_v0([meta, cont, data, inst])["iscc"]
assert code == "ISCC:KEC4CPEJKZZ7A4HZMZUFTZYEOHCPLIXPTTCOIKFIEPBNTGGQDLYGFSI"
assert ic.decompose(code) == di
assert ic.iscc_decompose(code) == di


def test_decompose_invalid():
Expand All @@ -280,7 +280,7 @@ def test_decompose_str_of_codes():
dco = ic.Code.rnd(ic.MT.DATA)
ico = ic.Code.rnd(ic.MT.INSTANCE)
iscc = f"ISCC:{mco.code}-{cco.code}-{dco.code}-{ico.code}"
codes = ic.codec.decompose(iscc)
codes = ic.codec.iscc_decompose(iscc)
assert codes == [mco.code, cco.code, dco.code, ico.code]


Expand All @@ -294,32 +294,32 @@ def test_Code_rnd():


def test_normalize_single_canonical():
n = ic.normalize("ISCC:AAATTZCKVH3S42TP")
n = ic.iscc_normalize("ISCC:AAATTZCKVH3S42TP")
assert n == "ISCC:AAATTZCKVH3S42TP"


def test_normalize_single_no_scheme():
n = ic.normalize("AAATTZCKVH3S42TP")
n = ic.iscc_normalize("AAATTZCKVH3S42TP")
assert n == "ISCC:AAATTZCKVH3S42TP"


def test_normalize_single_lower():
n = ic.normalize("aaattzckvh3s42tp")
n = ic.iscc_normalize("aaattzckvh3s42tp")
assert n == "ISCC:AAATTZCKVH3S42TP"


def test_normalize_single_mixed_case():
n = ic.normalize("AaAtTzckVh3s42tP")
n = ic.iscc_normalize("AaAtTzckVh3s42tP")
assert n == "ISCC:AAATTZCKVH3S42TP"


def test_normalize_dual():
n = ic.normalize("GAAW2PRCRS5LNVZVIAAUVACQKXE3V44W")
n = ic.iscc_normalize("GAAW2PRCRS5LNVZVIAAUVACQKXE3V44W")
assert n == "ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM"


def test_normalize_dual_dash():
n = ic.normalize("GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
n = ic.iscc_normalize("GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
assert n == "ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM"


Expand All @@ -328,100 +328,100 @@ def test_clean_dual_dash():


def test_normalize_dual_scheme():
n = ic.normalize("ISCC:GAAW2PRCRS5LNVZVIAAUVACQKXE3V44W")
n = ic.iscc_normalize("ISCC:GAAW2PRCRS5LNVZVIAAUVACQKXE3V44W")
assert n == "ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM"


def test_normalize_dual_scheme_dash():
n = ic.normalize("ISCC:GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
n = ic.iscc_normalize("ISCC:GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W")
assert n == "ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM"


def test_normalize_triple():
n = ic.normalize("EMAZQGH26X5XQ5HAGAA5U77EOAU2NU4YIAAU4SKRQCZZEYQD")
n = ic.iscc_normalize("EMAZQGH26X5XQ5HAGAA5U77EOAU2NU4YIAAU4SKRQCZZEYQD")
assert n == "ISCC:KMAZQGH26X5XQ5HA3J76I4BJU3JZQTSJKGALHETCAM"


def test_normalize_triple_dash():
n = ic.normalize("EMAZQGH26X5XQ5HA-GAA5U77EOAU2NU4Y-IAAU4SKRQCZZEYQD")
n = ic.iscc_normalize("EMAZQGH26X5XQ5HA-GAA5U77EOAU2NU4Y-IAAU4SKRQCZZEYQD")
assert n == "ISCC:KMAZQGH26X5XQ5HA3J76I4BJU3JZQTSJKGALHETCAM"


def test_normalize_triple_scheme():
n = ic.normalize("ISCC:EMAZQGH26X5XQ5HAGAA5U77EOAU2NU4YIAAU4SKRQCZZEYQD")
n = ic.iscc_normalize("ISCC:EMAZQGH26X5XQ5HAGAA5U77EOAU2NU4YIAAU4SKRQCZZEYQD")
assert n == "ISCC:KMAZQGH26X5XQ5HA3J76I4BJU3JZQTSJKGALHETCAM"


def test_normalize_triple_scheme_dash():
n = ic.normalize("ISCC:EMAZQGH26X5XQ5HA-GAA5U77EOAU2NU4Y-IAAU4SKRQCZZEYQD")
n = ic.iscc_normalize("ISCC:EMAZQGH26X5XQ5HA-GAA5U77EOAU2NU4Y-IAAU4SKRQCZZEYQD")
assert n == "ISCC:KMAZQGH26X5XQ5HA3J76I4BJU3JZQTSJKGALHETCAM"


def test_normalize_full_scheme():
n = ic.normalize("ISCC:KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY")
n = ic.iscc_normalize("ISCC:KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY")
assert n == "ISCC:KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY"


def test_normalize_full_scheme_lower():
n = ic.normalize("iscc:kacqmiruw6l64o2cazcve2hhxgbo7efcf3c4grfcdpd2nm53nkucxuy")
n = ic.iscc_normalize("iscc:kacqmiruw6l64o2cazcve2hhxgbo7efcf3c4grfcdpd2nm53nkucxuy")
assert n == "ISCC:KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY"


def test_normalize_full_no_scheme():
n = ic.normalize("KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY")
n = ic.iscc_normalize("KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY")
assert n == "ISCC:KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY"


def test_normalize_full_lower_no_scheme():
n = ic.normalize("kacqmiruw6l64o2cazcve2hhxgbo7efcf3c4grfcdpd2nm53nkucxuy")
n = ic.iscc_normalize("kacqmiruw6l64o2cazcve2hhxgbo7efcf3c4grfcdpd2nm53nkucxuy")
assert n == "ISCC:KACQMIRUW6L64O2CAZCVE2HHXGBO7EFCF3C4GRFCDPD2NM53NKUCXUY"


def test_normalize_iscc_id():
assert ic.normalize("MAAGZTFQTTVIZPJR") == "ISCC:MAAGZTFQTTVIZPJR"
assert ic.iscc_normalize("MAAGZTFQTTVIZPJR") == "ISCC:MAAGZTFQTTVIZPJR"


def test_normalize_iscc_id_lower():
assert ic.normalize("maagztfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"
assert ic.iscc_normalize("maagztfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"


def test_normalize_iscc_id_mixed():
assert ic.normalize("MaaGZTfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"
assert ic.iscc_normalize("MaaGZTfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"


def test_normalize_iscc_id_scheme():
assert ic.normalize("ISCC:MAAGZTFQTTVIZPJR") == "ISCC:MAAGZTFQTTVIZPJR"
assert ic.iscc_normalize("ISCC:MAAGZTFQTTVIZPJR") == "ISCC:MAAGZTFQTTVIZPJR"


def test_normalize_iscc_id_scheme_lower():
assert ic.normalize("iscc:maagztfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"
assert ic.iscc_normalize("iscc:maagztfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"


def test_normalize_iscc_id_scheme_mixed():
assert ic.normalize("Iscc:Maagztfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"
assert ic.iscc_normalize("Iscc:Maagztfqttvizpjr") == "ISCC:MAAGZTFQTTVIZPJR"


def test_normalize_mf_base16_single():
assert ic.normalize("fcc0120016c017dac75fe4613") == "ISCC:EAAWYAL5VR274RQT"
assert ic.iscc_normalize("fcc0120016c017dac75fe4613") == "ISCC:EAAWYAL5VR274RQT"


def test_normalize_mf_base32_single():
assert ic.normalize("bzqasaalmaf62y5p6iyjq") == "ISCC:EAAWYAL5VR274RQT"
assert ic.iscc_normalize("bzqasaalmaf62y5p6iyjq") == "ISCC:EAAWYAL5VR274RQT"


def test_normalize_mf_base58btc_single():
assert ic.normalize("z4rHXCkYCB2k4V7uuk") == "ISCC:EAAWYAL5VR274RQT"
assert ic.iscc_normalize("z4rHXCkYCB2k4V7uuk") == "ISCC:EAAWYAL5VR274RQT"


def test_normalize_mf_base64_url_single():
assert ic.normalize("uzAEgAWwBfax1_kYT") == "ISCC:EAAWYAL5VR274RQT"
assert ic.iscc_normalize("uzAEgAWwBfax1_kYT") == "ISCC:EAAWYAL5VR274RQT"


def test_codec_normalize_raises():
code = ic.Code(ic.gen_meta_code("Hello", "World")["iscc"])
bad = "f" + (b"\xcc\xff" + code.bytes).hex()
with pytest.raises(ValueError):
ic.normalize(bad)
ic.iscc_normalize(bad)


def test_codec_encode_length_std_type():
Expand Down

0 comments on commit 94f70a8

Please sign in to comment.