Skip to content

Commit

Permalink
Add multiformats support for codec convenience functions
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Nov 18, 2024
1 parent e1c43a6 commit b16072b
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Set CDC index to size if size is smaller than minimum chunk size
- Added Python 3.13 support
- Added .env to .gitignore
- Added multiformats support for codec convenience functions
- Updeted locked dependencies
- Removed EOL Python 3.8/3.9 support

Expand Down
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Set CDC index to size if size is smaller than minimum chunk size
- Added Python 3.13 support
- Added .env to .gitignore
- Added multiformats support for codec convenience functions
- Updeted locked dependencies
- Removed EOL Python 3.8/3.9 support

Expand Down
57 changes: 36 additions & 21 deletions iscc_core/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,33 @@ def decode_base32hex(code):
return decode_base32(b32)


def normalize_multiformat(iscc_code):
"""
Transcode a multiformat encoded ISCC to standard base32 encoding.
Returns the input unchanged (but cleaned) if it's not multiformat encoded.
"""
decoders = {
MULTIBASE.base16.value: bytes.fromhex, # f
MULTIBASE.base32.value: decode_base32, # b
MULTIBASE.base32hex.value: decode_base32hex, # v
MULTIBASE.base58btc.value: base58.b58decode, # z
MULTIBASE.base64url.value: decode_base64, # u
}

# Clean the ISCC code first
iscc_code = iscc_clean(iscc_code)

# Check for multibase prefix
multibase_prefix = iscc_code[0]
if multibase_prefix in decoders.keys():
decoder = decoders[multibase_prefix]
decoded = decoder(iscc_code[1:])
if not decoded.startswith(MC_PREFIX):
raise ValueError(f"Malformed multiformat codec: {decoded[:2]}")
return encode_base32(decoded[2:])
return iscc_code


def iscc_decompose(iscc_code):
# type: (str) -> List[str]
"""
Expand All @@ -333,9 +360,10 @@ def iscc_decompose(iscc_code):
A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated
by a hyphen.
"""
iscc_code = iscc_clean(iscc_code)
components = []
# Handle multiformat encoding first
iscc_code = normalize_multiformat(iscc_code)

components = []
raw_code = decode_base32(iscc_code)
while raw_code:
mt, st, vs, ln, body = decode_header(raw_code)
Expand Down Expand Up @@ -400,26 +428,13 @@ def iscc_normalize(iscc_code):
"""
from iscc_core.iscc_code import gen_iscc_code_v0

decoders = {
MULTIBASE.base16.value: bytes.fromhex, # f
MULTIBASE.base32.value: decode_base32, # b
MULTIBASE.base32hex.value: decode_base32hex, # v
MULTIBASE.base58btc.value: base58.b58decode, # z
MULTIBASE.base64url.value: decode_base64, # u
}
# Handle multiformat encoding first
iscc_code = normalize_multiformat(iscc_code)

# Transcode to base32 if <multibase><multicodec> encoded
multibase_prefix = iscc_code[0]
if multibase_prefix in decoders.keys():
decoder = decoders[multibase_prefix]
decoded = decoder(iscc_code[1:])
if not decoded.startswith(MC_PREFIX):
raise ValueError(f"Malformed multiformat codec: {decoded[:2]}")
iscc_code = encode_base32(decoded[2:])
else:
prefix = iscc_code.upper().replace("ISCC:", "")[:2]
if prefix not in PREFIXES:
raise ValueError(f"ISCC starts with invalid prefix {prefix}")
# Validate prefix
prefix = iscc_code.upper()[:2]
if prefix not in PREFIXES:
raise ValueError(f"ISCC starts with invalid prefix {prefix}")

decomposed = iscc_decompose(iscc_code)
recomposed = gen_iscc_code_v0(decomposed)["iscc"] if len(decomposed) >= 2 else decomposed[0]
Expand Down
23 changes: 23 additions & 0 deletions tests/test_multiformats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
import iscc_core as ic

CANONICAL = "ISCC:EAAWFH3PX3MCYB6N"
MF_B32H = "vpg0i00b2jtnrtm1c0v6g"
MF_B32H_P = "iscc:vpg0i00b2jtnrtm1c0v6g"
ISCC_OBJ = ic.Code(CANONICAL)


def test_iscc_clean():
assert ic.iscc_clean(MF_B32H) == MF_B32H
assert ic.iscc_clean(MF_B32H_P) == MF_B32H


def test_iscc_decode():
assert ic.iscc_decode(MF_B32H) == ISCC_OBJ._head + (ISCC_OBJ._body.tobytes(),)
assert ic.iscc_decode(MF_B32H_P) == ISCC_OBJ._head + (ISCC_OBJ._body.tobytes(),)


def test_iscc_decompose():
decomposed = ic.iscc_decompose(ISCC_OBJ.uri)
assert ic.iscc_decompose(MF_B32H) == decomposed
assert ic.iscc_decompose(MF_B32H_P) == decomposed

0 comments on commit b16072b

Please sign in to comment.