Skip to content

Commit

Permalink
BUG: Fix factorize to ensure proper use of null_encoding parameter an…
Browse files Browse the repository at this point in the history
…d backwards compatibility maintained
  • Loading branch information
asharmalik19 committed Jan 29, 2025
1 parent 166405d commit 66330ee
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,9 +1207,15 @@ def factorize(
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
data = data.cast(pa.int64())

if pa.types.is_dictionary(data.type) and null_encoding == "encode":
data = data.cast(data.type.value_type)
encoded = data.dictionary_encode(null_encoding=null_encoding)
if pa.types.is_dictionary(data.type):
if null_encoding == "encode":
# dictionary encode does nothing if an already encoded array is given
data = data.cast(data.type.value_type)
encoded = data.dictionary_encode(null_encoding=null_encoding)
else:
encoded = data
else:
encoded = data.dictionary_encode(null_encoding=null_encoding)
if encoded.length() == 0:
indices = np.array([], dtype=np.intp)
uniques = type(self)(pa.chunked_array([], type=encoded.type.value_type))
Expand Down

0 comments on commit 66330ee

Please sign in to comment.