From aa8e66a7eb9d325da49fceee5a9023a35fff5e32 Mon Sep 17 00:00:00 2001 From: Plastikmensch Date: Fri, 3 Nov 2023 04:16:50 +0100 Subject: [PATCH 1/6] Add highlight.js dependency highlight.js will be used to do the highlighting in the web interface Signed-off-by: Plastikmensch --- package.json | 3 ++- yarn.lock | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 40578504e822bb..ee5690ddd98ba6 100644 --- a/package.json +++ b/package.json @@ -50,9 +50,9 @@ "@renchap/compression-webpack-plugin": "^6.1.4", "@svgr/webpack": "^5.5.0", "abortcontroller-polyfill": "^1.7.5", - "atrament": "0.2.4", "arrow-key-navigation": "^1.2.0", "async-mutex": "^0.4.0", + "atrament": "0.2.4", "autoprefixer": "^10.4.14", "axios": "^1.4.0", "babel-loader": "^8.3.0", @@ -80,6 +80,7 @@ "font-awesome": "^4.7.0", "fuzzysort": "^2.0.4", "glob": "^10.2.6", + "highlight.js": "^11.9.0", "history": "^4.10.1", "hoist-non-react-statics": "^3.3.2", "http-link-header": "^1.1.1", diff --git a/yarn.lock b/yarn.lock index 1f23f26ddee4b5..b1759355f5100d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6617,6 +6617,11 @@ hasown@^2.0.0: dependencies: function-bind "^1.1.2" +highlight.js@^11.9.0: + version "11.9.0" + resolved "https://registry.yarnpkg.com/highlight.js/-/highlight.js-11.9.0.tgz#04ab9ee43b52a41a047432c8103e2158a1b8b5b0" + integrity sha512-fJ7cW7fQGCYAkgv4CPfwFHrfd/cLS4Hau96JuJ+ZTOWhjnhoeN1ub1tFmALm/+lW5z4WCAuAV9bm05AP0mS6Gw== + history@^4.10.1, history@^4.9.0: version "4.10.1" resolved "https://registry.yarnpkg.com/history/-/history-4.10.1.tgz#33371a65e3a83b267434e2b3f3b1b4c58aad4cf3" From 5d54afc988393f0d81a01ea608c4d7f3e2359f46 Mon Sep 17 00:00:00 2001 From: Plastikmensch Date: Fri, 3 Nov 2023 04:25:21 +0100 Subject: [PATCH 2/6] Add SCSS for highlighting Imports necessary styles from highlight.js Github style has been chosen as a default, because it is what most people will be familiar with. Signed-off-by: Plastikmensch --- app/javascript/flavours/glitch/styles/index.scss | 1 + app/javascript/flavours/glitch/styles/mastodon-light.scss | 1 + 2 files changed, 2 insertions(+) diff --git a/app/javascript/flavours/glitch/styles/index.scss b/app/javascript/flavours/glitch/styles/index.scss index 1cb913c8b832ec..205b9d64108aa0 100644 --- a/app/javascript/flavours/glitch/styles/index.scss +++ b/app/javascript/flavours/glitch/styles/index.scss @@ -22,3 +22,4 @@ @import 'rtl'; @import 'dashboard'; @import 'rich_text'; +@import 'node_modules/highlight.js/scss/github-dark'; diff --git a/app/javascript/flavours/glitch/styles/mastodon-light.scss b/app/javascript/flavours/glitch/styles/mastodon-light.scss index 8fc132651bdf67..40550254e099ec 100644 --- a/app/javascript/flavours/glitch/styles/mastodon-light.scss +++ b/app/javascript/flavours/glitch/styles/mastodon-light.scss @@ -1,3 +1,4 @@ @import 'mastodon-light/variables'; @import 'index'; @import 'mastodon-light/diff'; +@import 'node_modules/highlight.js/scss/github'; From f2758731071cc3281f1aaa5d01e83fb1f3cb0b31 Mon Sep 17 00:00:00 2001 From: Plastikmensch Date: Fri, 3 Nov 2023 04:50:31 +0100 Subject: [PATCH 3/6] Allow `data-codelang` attribute on code tags Allows users to set the language on code tags, which was previously discarded. Also allows this attribute on incoming toots. Signed-off-by: Plastikmensch --- app/lib/advanced_text_formatter.rb | 4 ++-- lib/sanitize_ext/sanitize_config.rb | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/app/lib/advanced_text_formatter.rb b/app/lib/advanced_text_formatter.rb index cdf1e2d9cd9394..1313d7ab4d511d 100644 --- a/app/lib/advanced_text_formatter.rb +++ b/app/lib/advanced_text_formatter.rb @@ -7,9 +7,9 @@ def initialize(options, &block) @format_link = block end - def block_code(code, _language) + def block_code(code, language) <<~HTML -
#{ERB::Util.h(code).gsub("\n", '
')}
+
#{ERB::Util.h(code).gsub("\n", '
')}
HTML end diff --git a/lib/sanitize_ext/sanitize_config.rb b/lib/sanitize_ext/sanitize_config.rb index 53508d3e45153e..3589febae2ebb6 100644 --- a/lib/sanitize_ext/sanitize_config.rb +++ b/lib/sanitize_ext/sanitize_config.rb @@ -84,6 +84,7 @@ module Config 'blockquote' => %w(cite), 'ol' => %w(start reversed), 'li' => %w(value), + 'code' => %w(data-codelang), }, add_attributes: { From a22c3c3a103633a963f283f7a46947177fabf351 Mon Sep 17 00:00:00 2001 From: Plastikmensch Date: Fri, 3 Nov 2023 05:06:00 +0100 Subject: [PATCH 4/6] Remove invalid `data-codelang` in backend Adds a new yml file containing all valid code languages which are supported by highlight.js This file has been created by a script as the list of supported languages is unreliable. This is not ideal, but highlight.js made it clear they won't add new languages natively. Instead additional language support is provided by third-party packages. If such a third-party package is installed, this file needs to be updated to support the new aliases. When an unsupported language is given, the `data-codelang` attribute is removed using a new transformer. Signed-off-by: Plastikmensch --- config/code-languages.yml | 373 ++++++++++++++++++++++++++++ lib/sanitize_ext/sanitize_config.rb | 12 + 2 files changed, 385 insertions(+) create mode 100644 config/code-languages.yml diff --git a/config/code-languages.yml b/config/code-languages.yml new file mode 100644 index 00000000000000..053de6a6d6465f --- /dev/null +++ b/config/code-languages.yml @@ -0,0 +1,373 @@ +# Note: Valid languages must not include spaces. +# When installing third-party languages, these have to be added here. +languages: + - 1c + - abnf + - accesslog + - actionscript + - as + - ada + - angelscript + - asc + - apache + - apacheconf + - applescript + - osascript + - arcade + - arduino + - ino + - armasm + - arm + - xml + - html + - xhtml + - rss + - atom + - xjb + - xsd + - xsl + - plist + - wsf + - svg + - asciidoc + - adoc + - aspectj + - autohotkey + - ahk + - autoit + - avrasm + - awk + - axapta + - x++ + - bash + - sh + - basic + - bnf + - brainfuck + - bf + - c + - h + - cal + - capnproto + - capnp + - ceylon + - clean + - icl + - dcl + - clojure + - clj + - edn + - clojure-repl + - cmake + - cmake.in + - coffeescript + - coffee + - cson + - iced + - coq + - cos + - cls + - cpp + - cc + - c++ + - h++ + - hpp + - hh + - hxx + - cxx + - crmsh + - crm + - pcmk + - crystal + - cr + - csharp + - cs + - c# + - csp + - css + - d + - markdown + - md + - mkdown + - mkd + - dart + - delphi + - dpr + - dfm + - pas + - pascal + - diff + - patch + - django + - jinja + - dns + - bind + - zone + - dockerfile + - docker + - dos + - bat + - cmd + - dsconfig + - dts + - dust + - dst + - ebnf + - elixir + - ex + - exs + - elm + - ruby + - rb + - gemspec + - podspec + - thor + - irb + - erb + - erlang-repl + - erlang + - erl + - excel + - xlsx + - xls + - fix + - flix + - fortran + - f90 + - f95 + - fsharp + - fs + - f# + - gams + - gms + - gauss + - gss + - gcode + - nc + - gherkin + - feature + - glsl + - gml + - go + - golang + - golo + - gradle + - graphql + - gql + - groovy + - haml + - handlebars + - hbs + - html.hbs + - html.handlebars + - htmlbars + - haskell + - hs + - haxe + - hx + - hsp + - http + - https + - hy + - hylang + - inform7 + - i7 + - ini + - toml + - irpf90 + - isbl + - java + - jsp + - javascript + - js + - jsx + - mjs + - cjs + - jboss-cli + - wildfly-cli + - json + - julia + - julia-repl + - jldoctest + - kotlin + - kt + - kts + - lasso + - ls + - lassoscript + - latex + - tex + - ldif + - leaf + - less + - lisp + - livecodeserver + - livescript + - ls + - llvm + - lsl + - lua + - makefile + - mk + - mak + - make + - mathematica + - mma + - wl + - matlab + - maxima + - mel + - mercury + - m + - moo + - mipsasm + - mips + - mizar + - perl + - pl + - pm + - mojolicious + - monkey + - moonscript + - moon + - n1ql + - nestedtext + - nt + - nginx + - nginxconf + - nim + - nix + - nixos + - node-repl + - nsis + - objectivec + - mm + - objc + - obj-c + - obj-c++ + - objective-c++ + - ocaml + - ml + - openscad + - scad + - oxygene + - parser3 + - pf + - pf.conf + - pgsql + - postgres + - postgresql + - php + - php-template + - plaintext + - text + - txt + - pony + - powershell + - pwsh + - ps + - ps1 + - processing + - pde + - profile + - prolog + - properties + - protobuf + - proto + - puppet + - pp + - purebasic + - pb + - pbi + - python + - py + - gyp + - ipython + - python-repl + - pycon + - q + - k + - kdb + - qml + - qt + - r + - reasonml + - re + - rib + - roboconf + - graph + - instances + - routeros + - mikrotik + - rsl + - ruleslanguage + - rust + - rs + - sas + - scala + - scheme + - scm + - scilab + - sci + - scss + - shell + - console + - shellsession + - smali + - smalltalk + - st + - sml + - ml + - sqf + - sql + - stan + - stanfuncs + - stata + - do + - ado + - step21 + - p21 + - step + - stp + - stylus + - styl + - subunit + - swift + - taggerscript + - yaml + - yml + - tap + - tcl + - tk + - thrift + - tp + - twig + - craftcms + - typescript + - ts + - tsx + - mts + - cts + - vala + - vbnet + - vb + - vbscript + - vbs + - vbscript-html + - verilog + - v + - sv + - svh + - vhdl + - vim + - wasm + - wren + - x86asm + - xl + - tao + - xquery + - xpath + - xq + - xqm + - zephir + - zep diff --git a/lib/sanitize_ext/sanitize_config.rb b/lib/sanitize_ext/sanitize_config.rb index 3589febae2ebb6..99705299d2cbac 100644 --- a/lib/sanitize_ext/sanitize_config.rb +++ b/lib/sanitize_ext/sanitize_config.rb @@ -21,6 +21,16 @@ module Config gemini ).freeze + # Valid code languages for highlight.js + VALID_LANGUAGES = YAML.load_file(File.expand_path('../../config/code-languages.yml', __dir__))['languages'].freeze + + DATA_LANG_TRANSFORMER = lambda do |env| + return unless env[:node_name] == 'code' && env[:node]['data-codelang'] + + node = env[:node] + node.remove_attribute('data-codelang') unless VALID_LANGUAGES.include?(node['data-codelang'].downcase) + end + CLASS_WHITELIST_TRANSFORMER = lambda do |env| node = env[:node] class_list = node['class']&.split(/[\t\n\f\r ]/) @@ -104,6 +114,7 @@ module Config IMG_TAG_TRANSFORMER, TRANSLATE_TRANSFORMER, UNSUPPORTED_HREF_TRANSFORMER, + DATA_LANG_TRANSFORMER, ] ) @@ -170,6 +181,7 @@ module Config UNSUPPORTED_HREF_TRANSFORMER, LINK_REL_TRANSFORMER, LINK_TARGET_TRANSFORMER, + DATA_LANG_TRANSFORMER, ] ) end From 57a7a17567e70c34fd45bad678410e0bb396a478 Mon Sep 17 00:00:00 2001 From: Plastikmensch Date: Fri, 3 Nov 2023 05:18:26 +0100 Subject: [PATCH 5/6] Add Syntax highlighting to web UI Adds a new function to add syntax highlighting on `contentHtml` Also supports translated content. Signed-off-by: Plastikmensch --- .../glitch/components/status_content.jsx | 3 +- app/javascript/flavours/glitch/utils/html.js | 52 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/app/javascript/flavours/glitch/components/status_content.jsx b/app/javascript/flavours/glitch/components/status_content.jsx index 67e5d53d830ce1..768f2bb140ab81 100644 --- a/app/javascript/flavours/glitch/components/status_content.jsx +++ b/app/javascript/flavours/glitch/components/status_content.jsx @@ -11,6 +11,7 @@ import { connect } from 'react-redux'; import { Icon } from 'flavours/glitch/components/icon'; import { autoPlayGif, languages as preloadedLanguages } from 'flavours/glitch/initial_state'; +import { highlightCode } from 'flavours/glitch/utils/html'; import { decode as decodeIDNA } from 'flavours/glitch/utils/idna'; import Permalink from './permalink'; @@ -345,7 +346,7 @@ class StatusContent extends PureComponent { const targetLanguages = this.props.languages?.get(status.get('language') || 'und'); const renderTranslate = this.props.onTranslate && this.context.identity.signedIn && ['public', 'unlisted'].includes(status.get('visibility')) && status.get('search_index').trim().length > 0 && targetLanguages?.includes(contentLocale); - const content = { __html: statusContent ?? getStatusContent(status) }; + const content = { __html: highlightCode(statusContent ?? getStatusContent(status)) }; const spoilerContent = { __html: status.getIn(['translation', 'spoilerHtml']) || status.get('spoilerHtml') }; const language = status.getIn(['translation', 'language']) || status.get('language'); const classNames = classnames('status__content', { diff --git a/app/javascript/flavours/glitch/utils/html.js b/app/javascript/flavours/glitch/utils/html.js index 247e98c88a7f31..b20434be99178e 100644 --- a/app/javascript/flavours/glitch/utils/html.js +++ b/app/javascript/flavours/glitch/utils/html.js @@ -1,6 +1,58 @@ +import highlightjs from 'highlight.js'; + // NB: This function can still return unsafe HTML export const unescapeHTML = (html) => { const wrapper = document.createElement('div'); wrapper.innerHTML = html.replace(//g, '\n').replace(/<\/p>

/g, '\n\n').replace(/<[^>]*>/g, ''); return wrapper.textContent; }; + +/** + * Highlights code in code tags.\ + * Uses highlight.js to convert content inside code tags to span elements with class attributes + * @param {string} content - String containing html code tags + * @returns {string} content with highlighted code inside code tags, or content if not highlighted + */ +export const highlightCode = (content) => { + // highlightJS complains when unescaped html is given + highlightjs.configure({ ignoreUnescapedHTML: true }); + + // Create a new temporary element to work on + const wrapper = document.createElement('div'); + wrapper.innerHTML = content; + + // Get code elements and run highlightJS on each. + wrapper.querySelectorAll('code') + .forEach((code) => { + // Get language from data attribute containing code language of code element + let lang = highlightjs.getLanguage(code.dataset.codelang); + + // Check if lang is a valid language + if (lang !== undefined) { + // Set codelang as class attribute, since highlightElement cannot be given a language + // highlightJS will read this attribute and use it to highlight in the proper language + code.setAttribute('class', code.dataset.codelang); + + // Set title attribute to language name, i.e. "js" will become "Javascript" + code.setAttribute('title', lang.name); + + // Replace
as highlightJS removes them, messing up formatting + let brTags = Array.from(code.getElementsByTagName('br')); + for (let br of brTags) { + br.replaceWith('\n'); + } + + // Highlight the code element + highlightjs.highlightElement(code); + + // highlightJS adds own class attribute, remove it again to not mess up styling + code.removeAttribute('class'); + } else { + // Remove data attribute as it's not a valid language. + delete code.dataset.codelang; + } + }); + + // return content with highlighted code + return wrapper.innerHTML; +}; From 5416d99a65365cbb7d9327c4ac529aa72fd50658 Mon Sep 17 00:00:00 2001 From: Plastikmensch Date: Fri, 3 Nov 2023 05:25:12 +0100 Subject: [PATCH 6/6] Update specs Add tests to make sure valid `code-lang` attributes are kept and invalid ones removed. Signed-off-by: Plastikmensch --- spec/lib/advanced_text_formatter_spec.rb | 8 ++++++++ spec/lib/sanitize_config_spec.rb | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/spec/lib/advanced_text_formatter_spec.rb b/spec/lib/advanced_text_formatter_spec.rb index f92385219615fe..54602f77a2a45e 100644 --- a/spec/lib/advanced_text_formatter_spec.rb +++ b/spec/lib/advanced_text_formatter_spec.rb @@ -50,6 +50,14 @@ it 'does not format links' do expect(subject).to include 'return 0; // https://joinmastodon.org/foo' end + + context 'with valid language' do + let(:text) { "test\n\n```c++\nint main(void) {\n return 0; // https://joinmastodon.org/foo\n}\n```\n" } + + it 'formats code using

 and  with data containing set language' do
+            expect(subject).to include '
int main'
+          end
+        end
       end
 
       context 'with a link in inline code using backticks' do
diff --git a/spec/lib/sanitize_config_spec.rb b/spec/lib/sanitize_config_spec.rb
index cc9916bfd40e4a..1a65fb1aa0ccd0 100644
--- a/spec/lib/sanitize_config_spec.rb
+++ b/spec/lib/sanitize_config_spec.rb
@@ -55,6 +55,14 @@
     it 'keeps title in abbr' do
       expect(Sanitize.fragment('HTML', subject)).to eq 'HTML'
     end
+
+    it 'keeps data-codelang attribute in code' do
+      expect(Sanitize.fragment('int main(void) { return 0; // https://joinmastodon.org/foo }', subject)).to eq 'int main(void) { return 0; // https://joinmastodon.org/foo }'
+    end
+
+    it 'removes data-codelang attribute in code when unsupported' do
+      expect(Sanitize.fragment('int main(void) { return 0; // https://joinmastodon.org/foo }', subject)).to eq 'int main(void) { return 0; // https://joinmastodon.org/foo }'
+    end
   end
 
   describe '::MASTODON_OUTGOING' do