Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement RFC8187 filenames for Content-Disposition in MultiPart #12413

Merged
merged 2 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
Expand All @@ -33,6 +35,7 @@
import java.util.Objects;
import java.util.Queue;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;

import org.eclipse.jetty.io.Content;
import org.eclipse.jetty.io.content.ByteBufferContentSource;
Expand All @@ -43,6 +46,7 @@
import org.eclipse.jetty.util.SearchPattern;
import org.eclipse.jetty.util.StaticException;
import org.eclipse.jetty.util.StringUtil;
import org.eclipse.jetty.util.UrlEncoded;
import org.eclipse.jetty.util.Utf8StringBuilder;
import org.eclipse.jetty.util.thread.AutoLock;
import org.eclipse.jetty.util.thread.SerializedInvoker;
Expand Down Expand Up @@ -75,11 +79,59 @@ public class MultiPart
.allowEscapeOnlyForQuotes()
.build();
private static final int MAX_BOUNDARY_LENGTH = 70;
private static final Pattern NON_ASCII_PATTERN = Pattern.compile("[^\\x20-\\x7E]");
private static final Pattern WINDOWS_FILENAME = Pattern.compile(".??[a-zA-Z]:\\\\[^\\\\].*");

private MultiPart()
{
}

/**
* Encode a {@code Content-Disposition} file name according to RFC8187 using {@link java.nio.charset.StandardCharsets#UTF_8}
* @param fileName The filename to encode
* @return The encoding of the filename suitable for a {@code Content-Disposition} header
*/
public static String encodeContentDispositionFileName(String fileName)
{
return encodeContentDispositionFileName(fileName, UTF_8);
}

/**
* Encode a {@code Content-Disposition} file name according to RFC8187
* @param fileName The filename to encode
* @param charset The charset to encode with
* @return The encoding of the filename suitable for a {@code Content-Disposition} header
*/
static String encodeContentDispositionFileName(String fileName, Charset charset)
{
String encodedFileName = UrlEncoded.encodeString(fileName, charset);
if (encodedFileName.equals(fileName))
return "filename=\"" + fileName + "\"";

encodedFileName = encodedFileName.replace("+", "%20");
String asciiFilename = NON_ASCII_PATTERN.matcher(fileName).replaceAll("_");
return String.format("filename=\"%s\"; filename*=%s''%s", asciiFilename, charset.name(), encodedFileName);
}

/**
* Decode a {@code Content-Disposition} file name according to RFC8187
* @param encoded The encoded filename
* @return The decoded filename
* @throws IllegalArgumentException If the encoded string is of the wrong format
* @throws IllegalCharsetNameException If the charset name is illegal
* @throws UnsupportedCharsetException If the charset is unsupported
*/
static String decodeContentDispositionFileName(String encoded) throws IllegalArgumentException,
IllegalCharsetNameException,
UnsupportedCharsetException
{
int i = encoded.indexOf("''");
if (i == -1)
throw new IllegalArgumentException("invalid encoding");
Charset charset = Charset.forName(encoded.substring(0, i));
return UrlEncoded.decodeString(encoded, i + 2, encoded.length() - i - 2, charset);
}

/**
* <p>Extracts the value of the {@code boundary} parameter
* from the {@code Content-Type} header value, or returns
Expand Down Expand Up @@ -1746,6 +1798,8 @@ public void onPartHeader(String headerName, String headerValue)
{
String namePrefix = "name=";
String fileNamePrefix = "filename=";
String encodedFileNamePrefix = "filename*=";
boolean encoded = false;
for (Iterator<String> tokens = CONTENT_DISPOSITION_TOKENIZER.tokenize(headerValue); tokens.hasNext();)
{
String token = tokens.next();
Expand All @@ -1758,19 +1812,30 @@ public void onPartHeader(String headerName, String headerValue)
}
else if (lowerToken.startsWith(fileNamePrefix))
{
fileName = fileNameValue(token);
if (!encoded)
fileName = fileNameValue(token.substring(fileNamePrefix.length()).trim());
}
else if (lowerToken.startsWith(encodedFileNamePrefix))
{
try
{
fileName = fileNameValue(decodeContentDispositionFileName(token.substring(encodedFileNamePrefix.length()).trim()));
encoded = true;
}
catch (Exception x)
{
if (LOG.isTraceEnabled())
LOG.trace("ignored", x);
}
}
}
}
fields.add(new HttpField(headerName, headerValue));
}

private String fileNameValue(String token)
private String fileNameValue(String value)
{
int idx = token.indexOf('=');
String value = token.substring(idx + 1).trim();

if (value.matches(".??[a-zA-Z]:\\\\[^\\\\].*"))
if (WINDOWS_FILENAME.matcher(value).matches())
{
// Matched incorrectly escaped IE filenames that have the whole
// path, as in C:\foo, just strip any leading & trailing quotes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ protected HttpFields customizePartHeaders(MultiPart.Part part)
value += "; name=" + QuotedCSV.quote(name);
String fileName = part.getFileName();
if (fileName != null)
value += "; filename=" + QuotedCSV.quote(fileName);
value += "; " + MultiPart.encodeContentDispositionFileName(fileName);

return HttpFields.build(headers).put(HttpHeader.CONTENT_DISPOSITION, value);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.eclipse.jetty.util.Callback;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import static java.nio.charset.StandardCharsets.ISO_8859_1;
Expand Down Expand Up @@ -1326,10 +1325,32 @@ public void testPartWithWindowsFileName() throws Exception
}
}

// TODO We need to implement RFC8187 to lookfor the filename*= attribute. Meanwhile, it appears
// that escaping is only done for quote in these filenames.
@Test
@Disabled
public void testCorrectlyEncodedFilename() throws Exception
{
AsyncContent source = new TestContent();
MultiPartFormData.Parser formData = new MultiPartFormData.Parser("AaB03x");
formData.setFilesDirectory(_tmpDir);
formData.setMaxMemoryFileSize(-1);

String contents = """
--AaB03x\r
Content-Disposition: form-data; name="stuff"; filename="file.txt"; filename*=UTF-8''file%20%E2%9C%93.txt\r
Content-Type: text/plain\r
\r
stuffaaa\r
--AaB03x--\r
""";
Content.Sink.write(source, true, contents, Callback.NOOP);
try (MultiPartFormData.Parts parts = formData.parse(source).get(5, TimeUnit.SECONDS))
{
assertThat(parts.size(), is(1));
MultiPart.Part part = parts.get(0);
assertThat(part.getFileName(), is("file ✓.txt"));
}
}

@Test
public void testCorrectlyEncodedMSFilename() throws Exception
{
AsyncContent source = new TestContent();
Expand All @@ -1339,7 +1360,7 @@ public void testCorrectlyEncodedMSFilename() throws Exception

String contents = """
--AaB03x\r
content-disposition: form-data; name="stuff"; filename="c:\\\\this\\\\really\\\\is\\\\some\\\\path\\\\to\\\\a\\\\file.txt"\r
Content-Disposition: form-data; name="stuff"; filename="c:\\this\\really\\is\\some\\path\\to\\a\\file.txt"\r
Content-Type: text/plain\r
\r
stuffaaa\r
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
package org.eclipse.jetty.http;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
Expand All @@ -22,7 +24,6 @@

import org.eclipse.jetty.io.Content;
import org.eclipse.jetty.util.BufferUtil;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;
Expand All @@ -33,6 +34,7 @@
import static org.hamcrest.Matchers.is;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class MultiPartTest
Expand Down Expand Up @@ -371,48 +373,6 @@ public void testSimple() throws Exception
assertEquals(0, data.remaining());
}

/**
* Whitespace before boundaries.
*
* @see MultiPartCompliance.Violation#WHITESPACE_BEFORE_BOUNDARY
*/
@Test
@Disabled
public void testWhitespaceBeforeBoundary() throws Exception
{
TestPartsListener listener = new TestPartsListener();
MultiPart.Parser parser = new MultiPart.Parser("BOUNDARY", listener);

ByteBuffer data = BufferUtil.toBuffer("""
preamble\r
--BOUNDARY\r
name: value\r
\r
Hello\r
--BOUNDARY\r
powerLevel: 9001\r
\r
secondary\r
content\r
--BOUNDARY--epi\r
logue\r
""");

parser.parse(Content.Chunk.from(data, true));

assertEquals(2, listener.parts.size());

MultiPart.Part part1 = listener.parts.get(0);
assertEquals("value", part1.getHeaders().get("name"));
assertEquals("Hello", Content.Source.asString(part1.getContentSource()));

MultiPart.Part part2 = listener.parts.get(1);
assertEquals("9001", part2.getHeaders().get("powerLevel"));
assertEquals("secondary\r\ncontent", Content.Source.asString(part2.getContentSource()));

assertEquals(0, data.remaining());
}

@Test
public void testLineFeed() throws Exception
{
Expand Down Expand Up @@ -659,6 +619,22 @@ public void testBadHeaderNames(String badHeader)
assertThat(listener.failure.getMessage(), containsStringIgnoringCase("invalid header name"));
}

@Test
public void testEncodeContentDispositionFilename()
{
assertThat(MultiPart.encodeContentDispositionFileName("test.xml"), is("filename=\"test.xml\""));
assertThat(MultiPart.encodeContentDispositionFileName("test ✓.xml"), is("filename=\"test _.xml\"; filename*=UTF-8''test%20%E2%9C%93.xml"));
assertThat(MultiPart.encodeContentDispositionFileName("test ✓.xml", StandardCharsets.UTF_16), is("filename=\"test _.xml\"; filename*=UTF-16''╆䔥䙆┰ぴ┰づ┰び┰ぴ┰〫┲㜥ㄳ┰〮┰へ┰ね┰ぬ"));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at UTF-16 in the raw like this is so weird.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I think it is OK to have like that.... but it is strange.

}

@Test
public void testDecodeContentDispositionFilename()
{
assertThat(MultiPart.decodeContentDispositionFileName("UTF-8''test%20%E2%9C%93.xml"), is("test ✓.xml"));
assertThat(MultiPart.decodeContentDispositionFileName("utf-8''test%20%E2%9C%93.xml"), is("test ✓.xml"));
assertThrows(UnsupportedCharsetException.class, () -> MultiPart.decodeContentDispositionFileName("unknown-42''test.xml"));
}

private static byte[] randomBytes(int length)
{
byte[] random = new byte[length];
Expand Down
Loading