@trycua
Today we're announcing cua-bench: a framework for benchmarking, training data, and RL environments for computer-use AI agents. Why? Current agents show 10x variance across minor UI changes. Here's how we're fixing it.
Viewing enriched Twitter post
Today we're announcing cua-bench: a framework for benchmarking, training data, and RL environments for computer-use AI agents. Why? Current agents show 10x variance across minor UI changes. Here's how we're fixing it.
{
"media": [
{
"url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/2000972986090709370/media_0.mp4?",
"media_url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/2000972986090709370/media_0.mp4?",
"type": "video",
"filename": "media_0.mp4"
}
],
"processed_at": "2025-12-16T18:11:28.199232",
"pipeline_version": "2.0"
} {
"type": "tweet",
"id": "2000972986090709370",
"url": "https://x.com/trycua/status/2000972986090709370",
"twitterUrl": "https://twitter.com/trycua/status/2000972986090709370",
"text": "Today we're announcing cua-bench: a framework for benchmarking, training data, and RL environments for computer-use AI agents. Why? Current agents show 10x variance across minor UI changes. Here's how we're fixing it.",
"source": "Twitter for iPhone",
"retweetCount": 13,
"replyCount": 8,
"likeCount": 34,
"quoteCount": 4,
"viewCount": 1868,
"createdAt": "Tue Dec 16 16:55:10 +0000 2025",
"lang": "en",
"bookmarkCount": 6,
"isReply": false,
"inReplyToId": null,
"conversationId": "2000972986090709370",
"displayTextRange": [
0,
217
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "trycua",
"url": "https://x.com/trycua",
"twitterUrl": "https://twitter.com/trycua",
"id": "1883205292596359168",
"name": "Cua",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": "Business",
"profilePicture": "https://pbs.twimg.com/profile_images/2000278132834590726/WDOB_sTz_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/1883205292596359168/1765401186",
"description": "",
"location": "San Francisco, CA",
"followers": 3914,
"following": 1560,
"status": "",
"canDm": false,
"canMediaTag": true,
"createdAt": "Sat Jan 25 17:28:35 +0000 2025",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 2221,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 74,
"statusesCount": 608,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2000972986090709370"
],
"profile_bio": {
"description": "Open-source infrastructure for Computer-Use Agents // YC X25",
"entities": {
"description": {},
"url": {
"urls": [
{
"display_url": "cua.ai",
"expanded_url": "https://cua.ai",
"indices": [
0,
23
],
"url": "https://t.co/EIf47c6rhH"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"additional_media_info": {
"monetizable": false
},
"allow_download_status": {
"allow_download": true
},
"display_url": "pic.twitter.com/nD0JTFHWjJ",
"expanded_url": "https://twitter.com/trycua/status/2000972986090709370/video/1",
"ext_media_availability": {
"status": "Available"
},
"id_str": "2000972576449769489",
"indices": [
218,
241
],
"media_key": "13_2000972576449769489",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwABAoAARvE4fToljARAAA=",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAAECgABG8Th9OiWMBEAAA==",
"media_key": "13_2000972576449769489"
}
},
"media_url_https": "https://pbs.twimg.com/amplify_video_thumb/2000972576449769489/img/94WIrv1XYN7lgL6Z.jpg",
"original_info": {
"focus_rects": [],
"height": 1080,
"width": 1620
},
"sizes": {
"large": {
"h": 1080,
"w": 1620
}
},
"type": "video",
"url": "https://t.co/nD0JTFHWjJ",
"video_info": {
"aspect_ratio": [
3,
2
],
"duration_millis": 122921,
"variants": [
{
"content_type": "application/x-mpegURL",
"url": "https://video.twimg.com/amplify_video/2000972576449769489/pl/KYkHKxAGStcD1HAc.m3u8?tag=21&v=3cd"
},
{
"bitrate": 256000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/2000972576449769489/vid/avc1/404x270/va7_-Hw27QzRnuXd.mp4?tag=21"
},
{
"bitrate": 832000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/2000972576449769489/vid/avc1/540x360/4znYngITHzp7uJTc.mp4?tag=21"
},
{
"bitrate": 2176000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/2000972576449769489/vid/avc1/1080x720/z5lWsdbJIErx60NI.mp4?tag=21"
},
{
"bitrate": 10368000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/2000972576449769489/vid/avc1/1620x1080/xhYZQmia6JTgIYhL.mp4?tag=21"
}
]
}
}
]
},
"card": null,
"place": {},
"entities": {},
"quoted_tweet": null,
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
}