From 77958849468c2946496e300343c4a9fca310fd01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:30:20 +0000 Subject: [PATCH 1/8] Initial plan From a4585d8d948ea028eb91335536458c0e5bf50727 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:48:40 +0000 Subject: [PATCH 2/8] Add test documenting missing PEP249 alerts for connection stored in self attribute --- .../library-tests/frameworks/hdbcli/pep249.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index 713f15cb6d4f..f317a4958169 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -7,3 +7,31 @@ cursor.executemany("some sql", (42,)) # $ getSql="some sql" cursor.close() + + +# Connection stored in a class attribute (`self._conn`) and used in another method. +# +# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in +# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a +# `self` attribute in one method and read from a `self` attribute in another method (see the +# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the +# limitation is specific to the type-tracking-based modeling. +class Database: + def __init__(self): + self._conn = dbapi.connect(address="hostname", port=300, user="username") + + def get_connection(self): + return self._conn + + def run_via_getter(self): + conn = self.get_connection() + cursor = conn.cursor() + cursor.execute("getter sql") # $ MISSING: getSql="getter sql" + + def run_direct(self): + self._conn.execute("direct sql") # $ MISSING: getSql="direct sql" + + +db = Database() +db.run_via_getter() +db.run_direct() From 73bc2d70ae1067b8bc0ee0d8f55c7e9a0c38326e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 11 Jun 2026 06:08:49 +0000 Subject: [PATCH 3/8] Model instance-attribute type flow Use a field level step like JS and Ruby. --- .../new/internal/TypeTrackingImpl.qll | 47 +++++++++++++++++++ .../dataflow/typetracking/attribute_tests.py | 4 +- .../library-tests/frameworks/hdbcli/pep249.py | 13 +++-- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 215c7906e655..a242c1d8e50c 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -172,6 +172,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) { TypeTrackerSummaryFlow::levelStepNoCall(nodeFrom, nodeTo) + or + localFieldStep(nodeFrom, nodeTo) } /** @@ -317,6 +319,51 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `ref` accesses attribute `attr` of `self`, where `self` is the first + * parameter of an instance method of `cls` (i.e. an access of the form `self.attr`). + * + * Static methods and class methods are excluded, since their first parameter is not a + * `self` instance reference. + */ + private predicate selfAttrRef(Class cls, string attr, DataFlowPublic::AttrRef ref) { + exists(Function method, Name selfUse | + method = cls.getAMethod() and + not DataFlowDispatch::isStaticmethod(method) and + not DataFlowDispatch::isClassmethod(method) and + selfUse.getVariable() = method.getArg(0).(Name).getVariable() and + ref.getObject().asCfgNode().getNode() = selfUse and + ref.mayHaveAttributeName(attr) + ) + } + + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance + * method of the same class. + * + * This models flow through instance attributes (`self.foo`): a value stored into + * `self.foo` in one method can be read from `self.foo` in another method. Type-tracking + * handles the store and read steps via `AttrWrite`/`AttrRead`, but on its own it cannot + * relate the `self` of the writing method to the `self` of the reading method. Following + * the approach used for Ruby and JavaScript, we model this directly as a level step from + * the written value to the read reference, for any pair of methods on the class (not + * just from `__init__`). + * + * This is an over-approximation: it is instance-insensitive (it does not distinguish + * between different instances of the same class) and order-insensitive (it does not + * require the write to happen before the read), matching the precision of + * instance-attribute handling for Ruby and JavaScript. + */ + private predicate localFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | + selfAttrRef(cls, attr, write) and + nodeFrom = write.getValue() and + selfAttrRef(cls, attr, read) and + nodeTo = read + ) + } + /** * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. */ diff --git a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py index c49cdf77fcdf..05496ad74d09 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py +++ b/python/ql/test/library-tests/dataflow/typetracking/attribute_tests.py @@ -151,10 +151,10 @@ def __init__(self): # $ tracked=foo self.foo = tracked # $ tracked=foo tracked def print_foo(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo def possibly_uncalled_method(self): # $ MISSING: tracked=foo - print(self.foo) # $ MISSING: tracked=foo tracked + print(self.foo) # $ tracked MISSING: tracked=foo instance = MyClass2() print(instance.foo) # $ MISSING: tracked=foo tracked diff --git a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py index f317a4958169..0c6c39086482 100644 --- a/python/ql/test/library-tests/frameworks/hdbcli/pep249.py +++ b/python/ql/test/library-tests/frameworks/hdbcli/pep249.py @@ -11,11 +11,10 @@ # Connection stored in a class attribute (`self._conn`) and used in another method. # -# This is currently NOT detected: the `Connection::instance()`/`execute()` predicates in -# PEP249.qll are based on type tracking, which cannot follow a value that is stored into a -# `self` attribute in one method and read from a `self` attribute in another method (see the -# `MISSING` markers below). Regular (global) data flow handles this case correctly, so the -# limitation is specific to the type-tracking-based modeling. +# This is detected because type tracking includes a level step modelling flow through +# instance attributes: a value written to `self._conn` in one method (here `__init__`) can +# be read back from `self._conn` (directly or via a getter) in any other method on the same +# class. This follows the same approach used for instance fields in Ruby and JavaScript. class Database: def __init__(self): self._conn = dbapi.connect(address="hostname", port=300, user="username") @@ -26,10 +25,10 @@ def get_connection(self): def run_via_getter(self): conn = self.get_connection() cursor = conn.cursor() - cursor.execute("getter sql") # $ MISSING: getSql="getter sql" + cursor.execute("getter sql") # $ getSql="getter sql" def run_direct(self): - self._conn.execute("direct sql") # $ MISSING: getSql="direct sql" + self._conn.execute("direct sql") # $ getSql="direct sql" db = Database() From befb557bfd8b0c3d1800177c155a580765abbe22 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Thu, 11 Jun 2026 15:44:20 +0200 Subject: [PATCH 4/8] Accept fixed MISSING tests --- .../library-tests/CallGraph/InlineCallGraphTest.expected | 3 --- .../library-tests/CallGraph/code/class_attr_assign.py | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index b353309e852f..1cd62c6de347 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -1,9 +1,6 @@ testFailures debug_callableNotUnique pointsTo_found_typeTracker_notFound -| code/class_attr_assign.py:10:9:10:27 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:11:9:11:25 | ControlFlowNode for Attribute() | my_func | -| code/class_attr_assign.py:26:9:26:25 | ControlFlowNode for Attribute() | DummyObject.method | | code/class_super.py:50:1:50:6 | ControlFlowNode for Attribute() | outside_def | | code/conditional_in_argument.py:18:5:18:11 | ControlFlowNode for Attribute() | X.bar | | code/func_defined_outside_class.py:21:1:21:11 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py index 605375925f72..714e27dba1a0 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/class_attr_assign.py @@ -7,8 +7,8 @@ def __init__(self, func): self.direct_ref = my_func def later(self): - self.indirect_ref() # $ pt=my_func MISSING: tt=my_func - self.direct_ref() # $ pt=my_func MISSING: tt=my_func + self.indirect_ref() # $ pt=my_func tt=my_func + self.direct_ref() # $ pt=my_func tt=my_func foo = Foo(my_func) # $ tt=Foo.__init__ foo.later() # $ pt,tt=Foo.later @@ -23,7 +23,7 @@ def __init__(self): self.obj = DummyObject() def later(self): - self.obj.method() # $ pt=DummyObject.method MISSING: tt=DummyObject.method + self.obj.method() # $ pt=DummyObject.method tt=DummyObject.method bar = Bar(my_func) # $ tt=Bar.__init__ From 913dcb119059b42918661e644333e5052f7f6a6b Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Thu, 11 Jun 2026 22:20:52 +0200 Subject: [PATCH 5/8] Add change note --- .../2026-06-11-fix-type-tracking-instance-attributes.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md diff --git a/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md new file mode 100644 index 000000000000..25bc0e0f31f9 --- /dev/null +++ b/python/ql/lib/change-notes/2026-06-11-fix-type-tracking-instance-attributes.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Python type tracking now follows values stored in instance attributes such as `self.attr` across instance methods on the same class. As a result, analysis is more likely to recognize user-defined objects that are stored on `self` and used later in other methods, which may produce additional results. From d389ea4039e28f1876b2ea962f210d3cfaecae7a Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Fri, 12 Jun 2026 13:28:00 +0100 Subject: [PATCH 6/8] Convert sql-injection test to inline expectations --- .../SqlInjection.expected | 34 +++++++++---------- .../CWE-089-SqlInjection/SqlInjection.qlref | 3 +- .../CWE-089-SqlInjection/sql_injection.py | 10 +++--- .../sqlalchemy_textclause.py | 28 +++++++-------- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index 9ff8b1d718c1..c1958c23858d 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,3 +1,20 @@ +#select +| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | +| sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | edges | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | provenance | | @@ -35,20 +52,3 @@ nodes | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | subpaths -#select -| sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:26:28:26:85 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:27:28:27:87 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:31:50:31:72 | ControlFlowNode for Attribute() | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:41:26:41:33 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:42:31:42:38 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:43:30:43:37 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:44:35:44:42 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:45:41:45:48 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:46:46:46:53 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:47:47:47:54 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:48:52:48:59 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | -| sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref index d1d02cbe8d37..444c0e5f46aa 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.qlref @@ -1 +1,2 @@ -Security/CWE-089/SqlInjection.ql +query: Security/CWE-089/SqlInjection.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py index c79bee16cb21..52aa3169616e 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sql_injection.py @@ -11,19 +11,19 @@ class User(models.Model): pass @app.route("/users/") -def show_user(username): +def show_user(username): # $ Source with connection.cursor() as cursor: # GOOD -- Using parameters cursor.execute("SELECT * FROM users WHERE username = %s", username) User.objects.raw("SELECT * FROM users WHERE username = %s", (username,)) # BAD -- Using string formatting - cursor.execute("SELECT * FROM users WHERE username = '%s'" % username) + cursor.execute("SELECT * FROM users WHERE username = '%s'" % username) # $ Alert # BAD -- other ways of executing raw SQL code with string interpolation - User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username)) - User.objects.raw("insert into names_file ('name') values ('%s')" % username) - User.objects.extra("insert into names_file ('name') values ('%s')" % username) + User.objects.annotate(RawSQL("insert into names_file ('name') values ('%s')" % username)) # $ Alert + User.objects.raw("insert into names_file ('name') values ('%s')" % username) # $ Alert + User.objects.extra("insert into names_file ('name') values ('%s')" % username) # $ Alert # BAD (but currently no custom query to find this) # diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py index a54d64517d42..f35b1325366c 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/sqlalchemy_textclause.py @@ -20,15 +20,15 @@ class User(Base): @app.route("/users/") -def show_user(username): +def show_user(username): # $ Source session = sqlalchemy.orm.Session(engine) # BAD, normal SQL injection - stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username)) + stmt = sqlalchemy.text("SELECT * FROM users WHERE username = '{}'".format(username)) # $ Alert results = session.execute(stmt).fetchall() # BAD, allows SQL injection - username_formatted_for_sql = sqlalchemy.text("'{}'".format(username)) + username_formatted_for_sql = sqlalchemy.text("'{}'".format(username)) # $ Alert stmt = sqlalchemy.select(User).where(User.username == username_formatted_for_sql) results = session.execute(stmt).scalars().all() @@ -38,14 +38,14 @@ def show_user(username): # All of these should be flagged by query - t1 = sqlalchemy.text(username) - t2 = sqlalchemy.text(text=username) - t3 = sqlalchemy.sql.text(username) - t4 = sqlalchemy.sql.text(text=username) - t5 = sqlalchemy.sql.expression.text(username) - t6 = sqlalchemy.sql.expression.text(text=username) - t7 = sqlalchemy.sql.expression.TextClause(username) - t8 = sqlalchemy.sql.expression.TextClause(text=username) - - t9 = db.text(username) - t10 = db.text(text=username) + t1 = sqlalchemy.text(username) # $ Alert + t2 = sqlalchemy.text(text=username) # $ Alert + t3 = sqlalchemy.sql.text(username) # $ Alert + t4 = sqlalchemy.sql.text(text=username) # $ Alert + t5 = sqlalchemy.sql.expression.text(username) # $ Alert + t6 = sqlalchemy.sql.expression.text(text=username) # $ Alert + t7 = sqlalchemy.sql.expression.TextClause(username) # $ Alert + t8 = sqlalchemy.sql.expression.TextClause(text=username) # $ Alert + + t9 = db.text(username) # $ Alert + t10 = db.text(text=username) # $ Alert From 434a99447e087334c31a81d085ab42dcda283bb7 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Fri, 12 Jun 2026 13:29:27 +0100 Subject: [PATCH 7/8] Add thorough tests, including one MISSING alert --- .../Security/CWE-089-SqlInjection/app.py | 52 +++++++++++++++++++ .../CWE-089-SqlInjection/db_connection.py | 24 +++++++++ 2 files changed, 76 insertions(+) create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py new file mode 100644 index 000000000000..8046f1ef52ed --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py @@ -0,0 +1,52 @@ +from fastapi import FastAPI +from hdbcli import dbapi +from db_connection import get_conn +from db_connection import hdb_con +from db_connection import hdb_con2 +from db_connection import hdb_con3 +app = FastAPI() + +class DatabaseConnection: + + def __init__(self): + self._conn = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + def get_conn(self): + return self._conn + +db_connection = DatabaseConnection() + +@app.get("/unsafe1/") +async def unsafe(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe2/") +async def unsafe2(name: str): # $ Source + query = "select * from users where name=" + name + cursor = hdb_con2.cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe3/") +async def unsafe3(name: str): # $ MISSING: Source + query = "select * from users where name=" + name + cursor = hdb_con3.cursor() + cursor.execute(query) # $ MISSING: Alert + cursor.close() + +@app.get("/unsafe4/") +async def unsafe4(name: str): # $ Source + query = "select * from users where name=" + name + cursor = get_conn().cursor() + cursor.execute(query) # $ Alert + cursor.close() + +@app.get("/unsafe5/") +async def unsafe5(name: str): # $ Source + query = "select * from users where name=" + name + cursor = db_connection.get_conn().cursor() + cursor.execute(query) # $ Alert + cursor.close() diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py new file mode 100644 index 000000000000..b05a43bdebba --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/db_connection.py @@ -0,0 +1,24 @@ +from hdbcli import dbapi +from typing import Optional + +hdb_con = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + +class DatabaseConnection: + + def __init__(self): + self._conn = dbapi.connect(address='localhost', port=30015, user='system', password='Password123') + + def get_conn(self): + return self._conn + + +hdb_con2 = DatabaseConnection().get_conn() +hdb_con3 = DatabaseConnection()._conn + +_hana_connection: Optional[DatabaseConnection] = None +def get_conn(): + global _hana_connection + if _hana_connection is None: + _hana_connection = DatabaseConnection() + return _hana_connection.get_conn() From 3bad60cf53e126d0c8ed1da7c064df7cc6d86855 Mon Sep 17 00:00:00 2001 From: Owen Mansel-Chan Date: Fri, 12 Jun 2026 13:43:24 +0100 Subject: [PATCH 8/8] Fix MISSING alert --- .../new/internal/TypeTrackingImpl.qll | 40 ++++++++++++++++++- .../SqlInjection.expected | 30 ++++++++++++++ .../Security/CWE-089-SqlInjection/app.py | 4 +- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index a242c1d8e50c..b76f8aeb9a57 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -167,7 +167,9 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ - predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() } + predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { + instanceFieldStep(nodeFrom, nodeTo) + } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which does not depend on the call graph. */ predicate levelStepNoCall(Node nodeFrom, LocalSourceNode nodeTo) { @@ -337,6 +339,20 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `read` reads attribute `attr` from an instance of `cls`, where the instance + * is referred to from outside the methods of `cls` (i.e. an access of the form + * `instance.attr`, where `instance` is a reference to an instance of `cls`). + * + * This complements `selfAttrRef`, which only handles `self.attr` accesses inside the + * methods of `cls`. Unlike `selfAttrRef`, this depends on the call graph (via + * `classInstanceTracker`), so steps using it must be reported as `levelStepCall`. + */ + private predicate instanceAttrRead(Class cls, string attr, DataFlowPublic::AttrRead read) { + read.getObject() = DataFlowDispatch::classInstanceTracker(cls) and + read.mayHaveAttributeName(attr) + } + /** * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a * class, and `nodeTo` reads attribute `self.attr` in some (possibly different) instance @@ -364,6 +380,28 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { ) } + /** + * Holds if `nodeFrom` is written to attribute `self.attr` in some instance method of a + * class, and `nodeTo` reads attribute `attr` from an instance of the same class outside + * its methods (e.g. `instance.attr`). + * + * This is the cross-instance counterpart of `localFieldStep`: it relates a write of + * `self.attr` inside the class to a read of `attr` on a reference to an instance of the + * class. Identifying instances relies on the call graph (via `classInstanceTracker`), so + * this step is reported as `levelStepCall` rather than `levelStepNoCall`. + * + * Like `localFieldStep`, this is an over-approximation: it is both instance-insensitive + * and order-insensitive. + */ + private predicate instanceFieldStep(Node nodeFrom, LocalSourceNode nodeTo) { + exists(Class cls, string attr, DataFlowPublic::AttrWrite write, DataFlowPublic::AttrRead read | + selfAttrRef(cls, attr, write) and + nodeFrom = write.getValue() and + instanceAttrRead(cls, attr, read) and + nodeTo = read + ) + } + /** * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. */ diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected index c1958c23858d..8f60394d8a2b 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/SqlInjection.expected @@ -1,4 +1,9 @@ #select +| app.py:23:20:23:24 | ControlFlowNode for query | app.py:20:18:20:21 | ControlFlowNode for name | app.py:23:20:23:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:20:18:20:21 | ControlFlowNode for name | user-provided value | +| app.py:30:20:30:24 | ControlFlowNode for query | app.py:27:19:27:22 | ControlFlowNode for name | app.py:30:20:30:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:27:19:27:22 | ControlFlowNode for name | user-provided value | +| app.py:37:20:37:24 | ControlFlowNode for query | app.py:34:19:34:22 | ControlFlowNode for name | app.py:37:20:37:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:34:19:34:22 | ControlFlowNode for name | user-provided value | +| app.py:44:20:44:24 | ControlFlowNode for query | app.py:41:19:41:22 | ControlFlowNode for name | app.py:44:20:44:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:41:19:41:22 | ControlFlowNode for name | user-provided value | +| app.py:51:20:51:24 | ControlFlowNode for query | app.py:48:19:48:22 | ControlFlowNode for name | app.py:51:20:51:24 | ControlFlowNode for query | This SQL query depends on a $@. | app.py:48:19:48:22 | ControlFlowNode for name | user-provided value | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | This SQL query depends on a $@. | sql_injection.py:14:15:14:22 | ControlFlowNode for username | user-provided value | @@ -16,6 +21,16 @@ | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | This SQL query depends on a $@. | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | user-provided value | edges +| app.py:20:18:20:21 | ControlFlowNode for name | app.py:21:5:21:9 | ControlFlowNode for query | provenance | | +| app.py:21:5:21:9 | ControlFlowNode for query | app.py:23:20:23:24 | ControlFlowNode for query | provenance | | +| app.py:27:19:27:22 | ControlFlowNode for name | app.py:28:5:28:9 | ControlFlowNode for query | provenance | | +| app.py:28:5:28:9 | ControlFlowNode for query | app.py:30:20:30:24 | ControlFlowNode for query | provenance | | +| app.py:34:19:34:22 | ControlFlowNode for name | app.py:35:5:35:9 | ControlFlowNode for query | provenance | | +| app.py:35:5:35:9 | ControlFlowNode for query | app.py:37:20:37:24 | ControlFlowNode for query | provenance | | +| app.py:41:19:41:22 | ControlFlowNode for name | app.py:42:5:42:9 | ControlFlowNode for query | provenance | | +| app.py:42:5:42:9 | ControlFlowNode for query | app.py:44:20:44:24 | ControlFlowNode for query | provenance | | +| app.py:48:19:48:22 | ControlFlowNode for name | app.py:49:5:49:9 | ControlFlowNode for query | provenance | | +| app.py:49:5:49:9 | ControlFlowNode for query | app.py:51:20:51:24 | ControlFlowNode for query | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | provenance | | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | sql_injection.py:25:26:25:83 | ControlFlowNode for BinaryExpr | provenance | | @@ -33,6 +48,21 @@ edges | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:50:18:50:25 | ControlFlowNode for username | provenance | | | sqlalchemy_textclause.py:23:15:23:22 | ControlFlowNode for username | sqlalchemy_textclause.py:51:24:51:31 | ControlFlowNode for username | provenance | | nodes +| app.py:20:18:20:21 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:21:5:21:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:23:20:23:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:27:19:27:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:28:5:28:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:30:20:30:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:34:19:34:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:35:5:35:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:37:20:37:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:41:19:41:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:42:5:42:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:44:20:44:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:48:19:48:22 | ControlFlowNode for name | semmle.label | ControlFlowNode for name | +| app.py:49:5:49:9 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | +| app.py:51:20:51:24 | ControlFlowNode for query | semmle.label | ControlFlowNode for query | | sql_injection.py:14:15:14:22 | ControlFlowNode for username | semmle.label | ControlFlowNode for username | | sql_injection.py:21:24:21:77 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | | sql_injection.py:24:38:24:95 | ControlFlowNode for BinaryExpr | semmle.label | ControlFlowNode for BinaryExpr | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py index 8046f1ef52ed..4de61346d8f5 100644 --- a/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection/app.py @@ -31,10 +31,10 @@ async def unsafe2(name: str): # $ Source cursor.close() @app.get("/unsafe3/") -async def unsafe3(name: str): # $ MISSING: Source +async def unsafe3(name: str): # $ Source query = "select * from users where name=" + name cursor = hdb_con3.cursor() - cursor.execute(query) # $ MISSING: Alert + cursor.execute(query) # $ Alert cursor.close() @app.get("/unsafe4/")