From ae34a9c80eb124ab8040158a9d02aa51fe825794 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 8 Oct 2024 14:01:52 +0100 Subject: [PATCH 1/8] template/text.Template execution methods: support reading arbitrary content --- go/ql/lib/ext/text.template.model.yml | 4 +- .../go/dataflow/internal/DataFlowPrivate.qll | 55 +++++++++++++++++++ .../go/dataflow/internal/DataFlowUtil.qll | 13 +++++ .../dataflow/internal/TaintTrackingUtil.qll | 24 +------- .../go/frameworks/stdlib/TextTemplate.qll | 40 ++++++++++++++ .../go/frameworks/serialization/test.expected | 0 .../go/frameworks/serialization/test.ql | 9 +++ .../frameworks/serialization/texttemplate.go | 50 +++++++++++++++++ 8 files changed, 171 insertions(+), 24 deletions(-) create mode 100644 go/ql/test/library-tests/semmle/go/frameworks/serialization/test.expected create mode 100644 go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql create mode 100644 go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go diff --git a/go/ql/lib/ext/text.template.model.yml b/go/ql/lib/ext/text.template.model.yml index 669af3a8854f..3ff0321a43c2 100644 --- a/go/ql/lib/ext/text.template.model.yml +++ b/go/ql/lib/ext/text.template.model.yml @@ -7,5 +7,5 @@ extensions: - ["text/template", "", False, "HTMLEscapeString", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] - ["text/template", "", False, "JSEscape", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] - ["text/template", "", False, "JSEscapeString", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] - - ["text/template", "Template", True, "Execute", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] - - ["text/template", "Template", True, "ExecuteTemplate", "", "", "Argument[2]", "Argument[0]", "taint", "manual"] +# - ["text/template", "Template", True, "Execute", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] # Implemented in QL to provide an arbitrary content read from the input. +# - ["text/template", "Template", True, "ExecuteTemplate", "", "", "Argument[2]", "Argument[0]", "taint", "manual"] # Implemented in QL to provide an arbitrary content read from the input. diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index 2fcbf2d350f2..6fe005385b24 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -165,6 +165,53 @@ predicate storeStep(Node node1, ContentSet c, Node node2) { containerStoreStep(node1, node2, c) } +/** + * Gets a `DataFlow::ContentSet` containing a single `Content` appropriate + * for reading a field, element, map value or channel message of type `containerType`. + */ +DataFlow::ContentSet getContentForType(Type containerType) { + containerType instanceof ArrayType and + result instanceof DataFlow::ArrayContent + or + containerType instanceof SliceType and + result instanceof DataFlow::ArrayContent + or + containerType instanceof ChanType and + result instanceof DataFlow::CollectionContent + or + containerType instanceof MapType and + result instanceof DataFlow::MapValueContent + or + result.(DataFlow::PointerContent).getPointerType() = containerType + or + exists(Field f | f = containerType.(StructType).getField(_) | + result.(DataFlow::FieldContent).getField() = f + ) +} + +/** + * Gets the type of an array/slice element, channel value, map value, + * pointer base type or named-type underlying type relating to `containerType`. + */ +Type getElementType(Type containerType) { + result = containerType.(ArrayType).getElementType() or + result = containerType.(SliceType).getElementType() or + result = containerType.(ChanType).getElementType() or + result = containerType.(MapType).getValueType() or + result = containerType.(PointerType).getPointerType() or + result = containerType.(NamedType).getUnderlyingType() +} + +/** + * Gets the type of an array/slice element, channel value, map value, + * pointer base type, named-type underlying type or struct field type + * relating to `containerType`. + */ +Type getAnElementOrFieldType(Type containerType) { + result = getElementType(containerType) or + result = containerType.(StructType).getField(_).getType() +} + /** * Holds if data can flow from `node1` to `node2` via a read of `c`. * Thus, `node1` references an object with a content `c` whose value ends up in @@ -184,6 +231,14 @@ predicate readStep(Node node1, ContentSet c, Node node2) { node2.(FlowSummaryNode).getSummaryNode()) or containerReadStep(node1, node2, c) + or + exists(Type containerType | + any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and + getAnElementOrFieldType*(node1.getType()) = containerType + | + c = getContentForType(containerType) and + node1 = node2 + ) } /** diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index 68ffe57f5f59..3d66f4b96e07 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -7,6 +7,7 @@ private import semmle.go.dataflow.FunctionInputsAndOutputs private import semmle.go.dataflow.ExternalFlow private import DataFlowPrivate private import FlowSummaryImpl as FlowSummaryImpl +private import codeql.util.Unit import DataFlowNodes::Public /** @@ -50,6 +51,18 @@ abstract class FunctionModel extends Function { } } +/** + * A unit class for adding nodes that should implicitly read from all nested content. + * + * For example, this might be appopriate for the argument to a method that serializes a struct. + */ +class ImplicitFieldReadNode extends Unit { + /** + * Holds if the node `n` should implicitly read from all nested content in a taint-tracking context. + */ + abstract predicate shouldImplicitlyReadAllFields(DataFlow::Node n); +} + /** * Gets the `Node` corresponding to `insn`. */ diff --git a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll index 5365228e2310..7f7f5af2df10 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -34,14 +34,6 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) { FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(src, sink, _) } -private Type getElementType(Type containerType) { - result = containerType.(ArrayType).getElementType() or - result = containerType.(SliceType).getElementType() or - result = containerType.(ChanType).getElementType() or - result = containerType.(MapType).getValueType() or - result = containerType.(PointerType).getPointerType() -} - /** * Holds if default `TaintTracking::Configuration`s should allow implicit reads * of `c` at sinks and inputs to additional taint steps. @@ -50,21 +42,9 @@ bindingset[node] predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { exists(Type containerType | node instanceof DataFlow::ArgumentNode and - getElementType*(node.getType()) = containerType + DataFlowPrivate::getElementType*(node.getType()) = containerType | - containerType instanceof ArrayType and - c instanceof DataFlow::ArrayContent - or - containerType instanceof SliceType and - c instanceof DataFlow::ArrayContent - or - containerType instanceof ChanType and - c instanceof DataFlow::CollectionContent - or - containerType instanceof MapType and - c instanceof DataFlow::MapValueContent - or - c.(DataFlow::PointerContent).getPointerType() = containerType + c = DataFlowPrivate::getContentForType(containerType) ) } diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll index 4ef4da058395..e67aef84f17c 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll @@ -67,4 +67,44 @@ module TextTemplate { input = inp and output = outp } } + + private class ExecuteTemplateMethod extends Method { + string name; + int inputArg; + + ExecuteTemplateMethod() { + this.hasQualifiedName("text/template", "Template", name) and + ( + name = "Execute" and inputArg = 1 + or + name = "ExecuteTemplate" and inputArg = 2 + ) + } + + int getInputArgIdx() { result = inputArg } + } + + private class ExecuteTemplateFieldReader extends DataFlow::ImplicitFieldReadNode { + override predicate shouldImplicitlyReadAllFields(DataFlow::Node n) { + exists(ExecuteTemplateMethod m, DataFlow::MethodCallNode cn | + cn.getACalleeIncludingExternals().asFunction() = m and + n = cn.getArgument(m.getInputArgIdx()) + ) + } + } + + private class ExecuteTemplateFunctionModels extends TaintTracking::FunctionModel, + ExecuteTemplateMethod + { + FunctionInput inp; + FunctionOutput outp; + + ExecuteTemplateFunctionModels() { + inp.isParameter(this.getInputArgIdx()) and outp.isParameter(0) + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + input = inp and output = outp + } + } } diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.expected b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql new file mode 100644 index 000000000000..171f2a1181b7 --- /dev/null +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql @@ -0,0 +1,9 @@ +import go +import TestUtilities.InlineFlowTest + +string getArgString(DataFlow::Node src, DataFlow::Node sink) { + exists(sink) and + result = src.(DataFlow::CallNode).getArgument(0).getExactValue() +} + +import TaintFlowTestArgString diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go new file mode 100644 index 000000000000..85a1372752ff --- /dev/null +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go @@ -0,0 +1,50 @@ +package xyz + +import ( + "bytes" + "text/template" +) + +type Inner1 struct { + Data string +} + +type Inner2 struct { + Data string +} + +type Inner3 struct { + Data string +} + +type Outer struct { + SliceField []Inner1 + PtrField *Inner2 + MapField map[string]Inner3 +} + +func source(n int) string { return "dummy" } +func sink(arg any) {} + +func test() { + + source1 := source(1) + source2 := source(2) + source3 := source(3) + + toSerialize := Outer{[]Inner1{{source1}}, &Inner2{source2}, map[string]Inner3{"key": {source3}}} + buff1 := new(bytes.Buffer) + buff2 := new(bytes.Buffer) + bytes1 := make([]byte, 10) + bytes2 := make([]byte, 10) + + tmpl, _ := template.New("test").Parse("Template text goes here (irrelevant for test)") + tmpl.ExecuteTemplate(buff1, "test", toSerialize) + buff1.Read(bytes1) + sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 + + tmpl.Execute(buff2, toSerialize) + buff2.Read(bytes2) + sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 + +} From 5548662a743a6c9079307605defaacb2e30c9fed Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 18 Oct 2024 17:36:17 +0100 Subject: [PATCH 2/8] Switch to implementation using a universal read-only ContentSet --- .../go/dataflow/internal/DataFlowPrivate.qll | 113 +++++------------- .../go/dataflow/internal/DataFlowUtil.qll | 54 ++++++++- .../go/dataflow/internal/FlowSummaryImpl.qll | 42 +++---- .../dataflow/internal/TaintTrackingUtil.qll | 31 ++++- .../semmle/go/frameworks/stdlib/NetHttp.qll | 2 +- .../frameworks/serialization/texttemplate.go | 5 +- 6 files changed, 135 insertions(+), 112 deletions(-) diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index 6fe005385b24..f9e569089bef 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -143,73 +143,28 @@ predicate jumpStep(Node n1, Node n2) { * Thus, `node2` references an object with a content `x` that contains the * value of `node1`. */ -predicate storeStep(Node node1, ContentSet c, Node node2) { - // a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`, - // which in turn flows into the pointer content of `p` - exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | - node1 = rhs and - node2.(PostUpdateNode).getPreUpdateNode() = base and - c = any(DataFlow::FieldContent fc | fc.getField() = f) +predicate storeStep(Node node1, ContentSet cs, Node node2) { + exists(Content c | cs.asOneContent() = c | + // a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`, + // which in turn flows into the pointer content of `p` + exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | + node1 = rhs and + node2.(PostUpdateNode).getPreUpdateNode() = base and + c = any(DataFlow::FieldContent fc | fc.getField() = f) + or + node1 = base and + node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) + ) or - node1 = base and - node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + node1 = node2.(AddressOperationNode).getOperand() and c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) + or + containerStoreStep(node1, node2, c) ) or - node1 = node2.(AddressOperationNode).getOperand() and - c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) - or - FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, + FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), cs, node2.(FlowSummaryNode).getSummaryNode()) - or - containerStoreStep(node1, node2, c) -} - -/** - * Gets a `DataFlow::ContentSet` containing a single `Content` appropriate - * for reading a field, element, map value or channel message of type `containerType`. - */ -DataFlow::ContentSet getContentForType(Type containerType) { - containerType instanceof ArrayType and - result instanceof DataFlow::ArrayContent - or - containerType instanceof SliceType and - result instanceof DataFlow::ArrayContent - or - containerType instanceof ChanType and - result instanceof DataFlow::CollectionContent - or - containerType instanceof MapType and - result instanceof DataFlow::MapValueContent - or - result.(DataFlow::PointerContent).getPointerType() = containerType - or - exists(Field f | f = containerType.(StructType).getField(_) | - result.(DataFlow::FieldContent).getField() = f - ) -} - -/** - * Gets the type of an array/slice element, channel value, map value, - * pointer base type or named-type underlying type relating to `containerType`. - */ -Type getElementType(Type containerType) { - result = containerType.(ArrayType).getElementType() or - result = containerType.(SliceType).getElementType() or - result = containerType.(ChanType).getElementType() or - result = containerType.(MapType).getValueType() or - result = containerType.(PointerType).getPointerType() or - result = containerType.(NamedType).getUnderlyingType() -} - -/** - * Gets the type of an array/slice element, channel value, map value, - * pointer base type, named-type underlying type or struct field type - * relating to `containerType`. - */ -Type getAnElementOrFieldType(Type containerType) { - result = getElementType(containerType) or - result = containerType.(StructType).getField(_).getType() } /** @@ -217,28 +172,26 @@ Type getAnElementOrFieldType(Type containerType) { * Thus, `node1` references an object with a content `c` whose value ends up in * `node2`. */ -predicate readStep(Node node1, ContentSet c, Node node2) { - node1 = node2.(PointerDereferenceNode).getOperand() and - c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType()) - or - exists(FieldReadNode read | - node2 = read and - node1 = read.getBase() and - c = any(DataFlow::FieldContent fc | fc.getField() = read.getField()) +predicate readStep(Node node1, ContentSet cs, Node node2) { + exists(Content c | cs.asOneContent() = c | + node1 = node2.(PointerDereferenceNode).getOperand() and + c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType()) + or + exists(FieldReadNode read | + node2 = read and + node1 = read.getBase() and + c = any(DataFlow::FieldContent fc | fc.getField() = read.getField()) + ) + or + containerReadStep(node1, node2, c) ) or - FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, + FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), cs, node2.(FlowSummaryNode).getSummaryNode()) or - containerReadStep(node1, node2, c) - or - exists(Type containerType | - any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and - getAnElementOrFieldType*(node1.getType()) = containerType - | - c = getContentForType(containerType) and - node1 = node2 - ) + any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and + cs.isUniversalContent() and + node1 = node2 } /** diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index 3d66f4b96e07..c748a1c138aa 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -182,6 +182,13 @@ class Content extends TContent { ) { filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0 } + + /** + * Gets the `ContentSet` contaning only this content. + */ + ContentSet asContentSet() { + result.asOneContent() = this + } } /** A reference through a field. */ @@ -249,21 +256,35 @@ class SyntheticFieldContent extends Content, TSyntheticFieldContent { override string toString() { result = s.toString() } } +private newtype TContentSet = + TOneContent(Content c) or + TAllContent() + /** * An entity that represents a set of `Content`s. * * The set may be interpreted differently depending on whether it is * stored into (`getAStoreContent`) or read from (`getAReadContent`). */ -class ContentSet instanceof Content { +class ContentSet instanceof TContentSet { /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { result = this } + Content getAStoreContent() { + this = TOneContent(result) + } /** Gets a content that may be read from when reading from this set. */ - Content getAReadContent() { result = this } + Content getAReadContent() { + this = TOneContent(result) + or + this = TAllContent() and result = any(Content c) + } /** Gets a textual representation of this content set. */ - string toString() { result = super.toString() } + string toString() { + exists(Content c | this = TOneContent(c) | result = c.toString()) + or + this = TAllContent() and result = "all content" + } /** * Holds if this element is at the specified location. @@ -275,7 +296,30 @@ class ContentSet instanceof Content { predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { - super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + exists(Content c | this = TOneContent(c) | + c.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + or + this = TAllContent() and + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } + + /** + * If this is a singleton content set, returns the content. + */ + Content asOneContent() { + this = TOneContent(result) + } + + /** + * Holds if this is a universal content set. + */ + predicate isUniversalContent() { + this = TAllContent() } } diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index 55de73895938..c592b9f55a76 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -61,26 +61,28 @@ module Input implements InputSig { } string encodeContent(ContentSet cs, string arg) { - exists(Field f, string package, string className, string fieldName | - f = cs.(FieldContent).getField() and - f.hasQualifiedName(package, className, fieldName) and - result = "Field" and - arg = package + "." + className + "." + fieldName - ) - or - exists(SyntheticField f | - f = cs.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + exists(Content c | cs.asOneContent() = c | + exists(Field f, string package, string className, string fieldName | + f = c.(FieldContent).getField() and + f.hasQualifiedName(package, className, fieldName) and + result = "Field" and + arg = package + "." + className + "." + fieldName + ) + or + exists(SyntheticField f | + f = c.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + ) + or + c instanceof ArrayContent and result = "ArrayElement" and arg = "" + or + c instanceof CollectionContent and result = "Element" and arg = "" + or + c instanceof MapKeyContent and result = "MapKey" and arg = "" + or + c instanceof MapValueContent and result = "MapValue" and arg = "" + or + c instanceof PointerContent and result = "Dereference" and arg = "" ) - or - cs instanceof ArrayContent and result = "ArrayElement" and arg = "" - or - cs instanceof CollectionContent and result = "Element" and arg = "" - or - cs instanceof MapKeyContent and result = "MapKey" and arg = "" - or - cs instanceof MapValueContent and result = "MapValue" and arg = "" - or - cs instanceof PointerContent and result = "Dereference" and arg = "" } bindingset[token] @@ -523,7 +525,7 @@ module Private { SummaryComponent qualifier() { result = argument(-1) } /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } + SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f).asContentSet()) } /** Gets a summary component that represents the return value of a call. */ SummaryComponent return() { result = SC::return(_) } diff --git a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll index 7f7f5af2df10..c69a7f32e63b 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -34,17 +34,38 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) { FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(src, sink, _) } +private Type getElementType(Type containerType) { + result = containerType.(ArrayType).getElementType() or + result = containerType.(SliceType).getElementType() or + result = containerType.(ChanType).getElementType() or + result = containerType.(MapType).getValueType() or + result = containerType.(PointerType).getPointerType() +} + /** * Holds if default `TaintTracking::Configuration`s should allow implicit reads * of `c` at sinks and inputs to additional taint steps. */ bindingset[node] -predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { - exists(Type containerType | +predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet cs) { + exists(Type containerType, DataFlow::Content c | node instanceof DataFlow::ArgumentNode and - DataFlowPrivate::getElementType*(node.getType()) = containerType + getElementType*(node.getType()) = containerType and + cs.asOneContent() = c | - c = DataFlowPrivate::getContentForType(containerType) + containerType instanceof ArrayType and + c instanceof DataFlow::ArrayContent + or + containerType instanceof SliceType and + c instanceof DataFlow::ArrayContent + or + containerType instanceof ChanType and + c instanceof DataFlow::CollectionContent + or + containerType instanceof MapType and + c instanceof DataFlow::MapValueContent + or + c.(DataFlow::PointerContent).getPointerType() = containerType ) } @@ -122,7 +143,7 @@ predicate elementWriteStep(DataFlow::Node pred, DataFlow::Node succ) { any(DataFlow::Write w).writesElement(succ.(DataFlow::PostUpdateNode).getPreUpdateNode(), _, pred) or FlowSummaryImpl::Private::Steps::summaryStoreStep(pred.(DataFlowPrivate::FlowSummaryNode) - .getSummaryNode(), any(DataFlow::Content c | c instanceof DataFlow::ArrayContent), + .getSummaryNode(), any(DataFlow::ArrayContent ac).asContentSet(), succ.(DataFlowPrivate::FlowSummaryNode).getSummaryNode()) } diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll index deb366b4cdaa..dded5092bcc3 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll @@ -122,7 +122,7 @@ module NetHttp { | lastParamIndex = call.getCall().getCalleeType().getNumParameter() - 1 and varArgsSliceArgument = SummaryComponentStack::argument(lastParamIndex) and - arrayContentSC = SummaryComponent::content(arrayContent) and + arrayContentSC = SummaryComponent::content(arrayContent.asContentSet()) and stack = SummaryComponentStack::push(arrayContentSC, varArgsSliceArgument) ) else stack = SummaryComponentStack::argument(n) diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go index 85a1372752ff..3c94020c60d4 100644 --- a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go @@ -43,7 +43,10 @@ func test() { buff1.Read(bytes1) sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 - tmpl.Execute(buff2, toSerialize) + // Read `buff2` via an `any`-typed variable, to ensure the static type of the argument to tmpl.Execute makes no difference to the result + var toSerializeAsAny any + toSerializeAsAny = toSerialize + tmpl.Execute(buff2, toSerializeAsAny) buff2.Read(bytes2) sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 From 9c409f128039127ccc0e6126f9efd2d6cdafdfd3 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 16 Dec 2024 14:04:27 +0000 Subject: [PATCH 3/8] Apply cosmetic review feedback --- .../semmle/go/dataflow/internal/DataFlowUtil.qll | 4 ++-- .../semmle/go/frameworks/stdlib/TextTemplate.qll | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index c748a1c138aa..b9cdeeb49a05 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -54,7 +54,7 @@ abstract class FunctionModel extends Function { /** * A unit class for adding nodes that should implicitly read from all nested content. * - * For example, this might be appopriate for the argument to a method that serializes a struct. + * For example, this might be appropriate for the argument to a method that serializes a struct. */ class ImplicitFieldReadNode extends Unit { /** @@ -276,7 +276,7 @@ class ContentSet instanceof TContentSet { Content getAReadContent() { this = TOneContent(result) or - this = TAllContent() and result = any(Content c) + this = TAllContent() and exists(result) } /** Gets a textual representation of this content set. */ diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll index e67aef84f17c..d97dc6eee7aa 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll @@ -69,15 +69,16 @@ module TextTemplate { } private class ExecuteTemplateMethod extends Method { - string name; int inputArg; ExecuteTemplateMethod() { - this.hasQualifiedName("text/template", "Template", name) and - ( - name = "Execute" and inputArg = 1 - or - name = "ExecuteTemplate" and inputArg = 2 + exists(string name | + this.hasQualifiedName("text/template", "Template", name) and + ( + name = "Execute" and inputArg = 1 + or + name = "ExecuteTemplate" and inputArg = 2 + ) ) } From 3573ff10c75601693b891d3f17b436358c0e29f3 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 16 Dec 2024 14:21:15 +0000 Subject: [PATCH 4/8] Update to account for changes on main --- .../library-tests/semmle/go/frameworks/serialization/test.ql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql index 171f2a1181b7..94c1b8f30b54 100644 --- a/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql @@ -1,5 +1,5 @@ import go -import TestUtilities.InlineFlowTest +import utils.test.InlineFlowTest string getArgString(DataFlow::Node src, DataFlow::Node sink) { exists(sink) and From bf34860ad09910864ae9865c460f9347d8d4bb94 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 16 Dec 2024 14:21:25 +0000 Subject: [PATCH 5/8] Test deeply ntested taint --- .../go/frameworks/serialization/texttemplate.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go index 3c94020c60d4..8068a314dc55 100644 --- a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go @@ -17,10 +17,15 @@ type Inner3 struct { Data string } +type HasInner3Slice struct { + Slice []Inner3 +} + type Outer struct { SliceField []Inner1 PtrField *Inner2 MapField map[string]Inner3 + DeepField HasInner3Slice } func source(n int) string { return "dummy" } @@ -31,8 +36,10 @@ func test() { source1 := source(1) source2 := source(2) source3 := source(3) + source4 := source(4) - toSerialize := Outer{[]Inner1{{source1}}, &Inner2{source2}, map[string]Inner3{"key": {source3}}} + toSerialize := Outer{[]Inner1{{source1}}, &Inner2{source2}, map[string]Inner3{"key": {source3}}, + HasInner3Slice{[]Inner3{{source4}}}} buff1 := new(bytes.Buffer) buff2 := new(bytes.Buffer) bytes1 := make([]byte, 10) @@ -41,13 +48,13 @@ func test() { tmpl, _ := template.New("test").Parse("Template text goes here (irrelevant for test)") tmpl.ExecuteTemplate(buff1, "test", toSerialize) buff1.Read(bytes1) - sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 + sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 hasTaintFlow=4 // Read `buff2` via an `any`-typed variable, to ensure the static type of the argument to tmpl.Execute makes no difference to the result var toSerializeAsAny any toSerializeAsAny = toSerialize tmpl.Execute(buff2, toSerializeAsAny) buff2.Read(bytes2) - sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 + sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 hasTaintFlow=4 } From 24eb7749212b3070c9cd2429b28e801c40e186fa Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 16 Dec 2024 14:30:33 +0000 Subject: [PATCH 6/8] Change note --- go/ql/lib/change-notes/2024-12-16-any-content-readers.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 go/ql/lib/change-notes/2024-12-16-any-content-readers.md diff --git a/go/ql/lib/change-notes/2024-12-16-any-content-readers.md b/go/ql/lib/change-notes/2024-12-16-any-content-readers.md new file mode 100644 index 000000000000..aa244c1b97af --- /dev/null +++ b/go/ql/lib/change-notes/2024-12-16-any-content-readers.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- +* By implementing `ImplicitFieldReadNode` it is now possible to declare a dataflow node that reads any content (fields, array members, map keys and values). For example, this is appropriate for modelling a serialization method that flattens a potentially deep data structure into a string or byte array. +* The `Template.Execute[Template]` methods of the `text/template` package now correctly convey taint from any nested fields to their result. This may produce more results from any taint-tracking query when the `text/template` package is in use. From 016bda04a53248b4aeed0fce4dcec15ab6aa0a50 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 16 Dec 2024 14:31:34 +0000 Subject: [PATCH 7/8] Autoformat --- .../semmle/go/dataflow/internal/DataFlowUtil.qll | 16 ++++------------ .../go/dataflow/internal/FlowSummaryImpl.qll | 4 +++- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index b9cdeeb49a05..b6ca2c825ebb 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -186,9 +186,7 @@ class Content extends TContent { /** * Gets the `ContentSet` contaning only this content. */ - ContentSet asContentSet() { - result.asOneContent() = this - } + ContentSet asContentSet() { result.asOneContent() = this } } /** A reference through a field. */ @@ -268,9 +266,7 @@ private newtype TContentSet = */ class ContentSet instanceof TContentSet { /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { - this = TOneContent(result) - } + Content getAStoreContent() { this = TOneContent(result) } /** Gets a content that may be read from when reading from this set. */ Content getAReadContent() { @@ -311,16 +307,12 @@ class ContentSet instanceof TContentSet { /** * If this is a singleton content set, returns the content. */ - Content asOneContent() { - this = TOneContent(result) - } + Content asOneContent() { this = TOneContent(result) } /** * Holds if this is a universal content set. */ - predicate isUniversalContent() { - this = TAllContent() - } + predicate isUniversalContent() { this = TAllContent() } } /** diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index c592b9f55a76..62c19e1565d9 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -525,7 +525,9 @@ module Private { SummaryComponent qualifier() { result = argument(-1) } /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f).asContentSet()) } + SummaryComponent field(Field f) { + result = content(any(FieldContent c | c.getField() = f).asContentSet()) + } /** Gets a summary component that represents the return value of a call. */ SummaryComponent return() { result = SC::return(_) } From 9504f3611fab6c0fb9b134cf4defd1587deb45bc Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 19 Dec 2024 14:49:15 +0000 Subject: [PATCH 8/8] Restrict text/template modelling to known call targets Otherwise it's too easy to define a common interface to both text/template, which doesn't sanitize, and html/template, which does. --- go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll index d97dc6eee7aa..c5a67f388f42 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll @@ -88,7 +88,7 @@ module TextTemplate { private class ExecuteTemplateFieldReader extends DataFlow::ImplicitFieldReadNode { override predicate shouldImplicitlyReadAllFields(DataFlow::Node n) { exists(ExecuteTemplateMethod m, DataFlow::MethodCallNode cn | - cn.getACalleeIncludingExternals().asFunction() = m and + cn.getTarget() = m and n = cn.getArgument(m.getInputArgIdx()) ) }