Skip to content

Commit

Permalink
remove decodes for unicode string constants
Browse files Browse the repository at this point in the history
Summary: Remove `b"<string>".decode()` handling of non-ascii unicode literals by using hex-escaping instead octal escapes for non-ascii characters.

Reviewed By: vitaut

Differential Revision: D68465372

fbshipit-source-id: a373078ebcccc1efd7f608d032bab2d81fbcdf3b
  • Loading branch information
ahilger authored and facebook-github-bot committed Jan 22, 2025
1 parent a5206e8 commit 905630f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1287,8 +1287,7 @@ class python_mstch_const_value : public mstch_const_value {
{"value:py3_enum_value_name",
&python_mstch_const_value::py3_enum_value_name},
{"value:py3_binary?", &python_mstch_const_value::is_binary},
{"value:contains_unicode?",
&python_mstch_const_value::contains_unicode},
{"value:unicode_value", &python_mstch_const_value::unicode_value},
{"value:const_enum_type",
&python_mstch_const_value::const_enum_type},
{"value:value_for_bool?",
Expand All @@ -1302,21 +1301,12 @@ class python_mstch_const_value : public mstch_const_value {
});
}

// A unicode string that actually contains unicode requires special handling
// because compiler can't directly render it as python unicode literal.
mstch::node contains_unicode() {
if (!is_nonbinary_string()) {
return false;
mstch::node unicode_value() {
if (type_ != cv::CV_STRING) {
return {};
}
const std::string& str = const_value_->get_string();
return std::any_of(
str.begin(), str.end(), [](signed char c) { return c < 0; });
}

bool is_nonbinary_string() {
auto& ttype = const_value_->ttype();
return type_ == cv::CV_STRING && ttype &&
ttype->get_true_type()->is_string();
return get_escaped_string<nonascii_handling::no_escape>(
const_value_->get_string());
}

mstch::node is_binary() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,8 @@ file.
}}{{/value:const_struct?}}{{!
}}{{^value:const_struct?}}{{!
}}{{#value:string?}}{{!
}}{{^value:contains_unicode?}}{{!
}}{{#value:py3_binary?}}b{{/value:py3_binary?}}"{{value:string_value}}"{{!
}}{{/value:contains_unicode?}}{{!
}}{{#value:contains_unicode?}}{{!
}}b"{{value:string_value}}".decode(){{!
}}{{/value:contains_unicode?}}{{!
}}{{#value:py3_binary?}}b"{{value:string_value}}"{{/value:py3_binary?}}{{!
}}{{^value:py3_binary?}}"{{value:unicode_value}}"{{/value:py3_binary?}}{{!
}}{{/value:string?}}{{!
}}{{#value:map?}}_fbthrift_python_types.Map({{!
}}{{#value:map_key_type}}{{> types/typeinfo }}{{/value:map_key_type}}, {{!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -780,9 +780,9 @@ def _to_py_deprecated(self):

char2ascii = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_i32, { "'": 39, "\"": 34, "\\": 92, "a": 97})

escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", b"\302\253".decode(), "j", b"\302\246".decode(), "ayyy", b"\302\253yyy".decode(), "jyyy", b"\302\246yyy".decode(), "zzza", b"zzz\302\253".decode(), "zzzj", b"zzz\302\246".decode(), "zzzayyy", b"zzz\302\253yyy".decode(), "zzzjyyy", b"zzz\302\246yyy".decode(), ))
escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", "«", "j", "¦", "ayyy", "«yyy", "jyyy", "¦yyy", "zzza", "zzz«", "zzzj", "zzz¦", "zzzayyy", "zzz«yyy", "zzzjyyy", "zzz¦yyy", ))

unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", b"Saint Barth\303\251lemy".decode(), ))
unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", "Saint Barthélemy", ))

false_c = False

Expand Down Expand Up @@ -816,7 +816,7 @@ def _to_py_deprecated(self):

empty_string_string_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { })

unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", b"B\303\211".decode(): b"Saint Barth\303\251lemy".decode()})
unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", "BÉ": "Saint Barthélemy"})

maxIntDec = 9223372036854775807

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -804,9 +804,9 @@ def _fbthrift_metadata__struct_union2():

char2ascii = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_i32, { "'": 39, "\"": 34, "\\": 92, "a": 97})

escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", b"\302\253".decode(), "j", b"\302\246".decode(), "ayyy", b"\302\253yyy".decode(), "jyyy", b"\302\246yyy".decode(), "zzza", b"zzz\302\253".decode(), "zzzj", b"zzz\302\246".decode(), "zzzayyy", b"zzz\302\253yyy".decode(), "zzzjyyy", b"zzz\302\246yyy".decode(), ))
escaped_strings = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("\001", "\037", " ", "'", "\"", "\n", "\r", "\011", "a", "«", "j", "¦", "ayyy", "«yyy", "jyyy", "¦yyy", "zzza", "zzz«", "zzzj", "zzz¦", "zzzayyy", "zzz«yyy", "zzzjyyy", "zzz¦yyy", ))

unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", b"Saint Barth\303\251lemy".decode(), ))
unicode_list = _fbthrift_python_types.List(_fbthrift_python_types.typeinfo_string, ("Bulgaria", "Benin", "Saint Barthélemy", ))

false_c = False

Expand Down Expand Up @@ -840,7 +840,7 @@ def _fbthrift_metadata__struct_union2():

empty_string_string_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { })

unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", b"B\303\211".decode(): b"Saint Barth\303\251lemy".decode()})
unicode_map = _fbthrift_python_types.Map(_fbthrift_python_types.typeinfo_string, _fbthrift_python_types.typeinfo_string, { "BG": "Bulgaria", "BH": "Bahrain", "BÉ": "Saint Barthélemy"})

maxIntDec = 9223372036854775807

Expand Down

0 comments on commit 905630f

Please sign in to comment.