@random_walker
For years I've said that the capability-reliability gap is an under-appreciated limitation of AI agents. Finally, in a new paper led by @steverab, we defined and measured it! https://t.co/h95qwFe8Oe
Viewing enriched Twitter post
For years I've said that the capability-reliability gap is an under-appreciated limitation of AI agents. Finally, in a new paper led by @steverab, we defined and measured it! https://t.co/h95qwFe8Oe
{
"score": 0.4,
"score_components": {
"author": 0.09,
"engagement": 0.0,
"quality": 0.1,
"source": 0.135,
"nlp": 0.05,
"recency": 0.025
},
"scored_at": "2026-03-01T12:12:15.034585",
"import_source": "api_import",
"source_tagged_at": "2026-03-01T12:12:15.034612",
"enriched": true,
"enriched_at": "2026-03-01T12:12:15.034617"
} {
"type": "tweet",
"id": "2026384543700115870",
"url": "https://x.com/random_walker/status/2026384543700115870",
"twitterUrl": "https://twitter.com/random_walker/status/2026384543700115870",
"text": "For years I've said that the capability-reliability gap is an under-appreciated limitation of AI agents. Finally, in a new paper led by @steverab, we defined and measured it! https://t.co/h95qwFe8Oe",
"source": "Twitter for iPhone",
"retweetCount": 31,
"replyCount": 8,
"likeCount": 161,
"quoteCount": 3,
"viewCount": 31967,
"createdAt": "Tue Feb 24 19:51:38 +0000 2026",
"lang": "en",
"bookmarkCount": 139,
"isReply": false,
"inReplyToId": null,
"conversationId": "2026384543700115870",
"displayTextRange": [
0,
198
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "random_walker",
"url": "https://x.com/random_walker",
"twitterUrl": "https://twitter.com/random_walker",
"id": "10834752",
"name": "Arvind Narayanan",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1650881612756942850/bZYjMyFU_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/10834752/1488663432",
"description": "",
"location": "Princeton, NJ",
"followers": 126209,
"following": 519,
"status": "",
"canDm": false,
"canMediaTag": false,
"createdAt": "Tue Dec 04 11:14:14 +0000 2007",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 23473,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 912,
"statusesCount": 13041,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2026316087604687193"
],
"profile_bio": {
"description": "Princeton CS prof and Director @PrincetonCITP. \nCoauthor of \"AI Snake Oil\" and \"AI as Normal Technology\". https://t.co/ZwebetjZ4n\nViews mine.",
"entities": {
"description": {
"hashtags": [],
"symbols": [],
"urls": [
{
"display_url": "normaltech.ai",
"expanded_url": "https://www.normaltech.ai/",
"indices": [
106,
129
],
"url": "https://t.co/ZwebetjZ4n"
}
],
"user_mentions": [
{
"id_str": "0",
"indices": [
31,
45
],
"name": "",
"screen_name": "PrincetonCITP"
}
]
},
"url": {
"urls": [
{
"display_url": "cs.princeton.edu/~arvindn/",
"expanded_url": "https://www.cs.princeton.edu/~arvindn/",
"indices": [
0,
23
],
"url": "https://t.co/px6fpS9QFq"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [
{
"display_url": "arxiv.org/pdf/2602.16666",
"expanded_url": "https://arxiv.org/pdf/2602.16666",
"indices": [
175,
198
],
"url": "https://t.co/h95qwFe8Oe"
}
],
"user_mentions": [
{
"id_str": "138821636",
"indices": [
136,
145
],
"name": "Stephan Rabanser",
"screen_name": "steverab"
}
]
},
"quoted_tweet": {
"type": "tweet",
"id": "2026316087604687193",
"url": "https://x.com/random_walker/status/2026316087604687193",
"twitterUrl": "https://twitter.com/random_walker/status/2026316087604687193",
"text": "https://t.co/16ak7tW7Z7",
"source": "Twitter for iPhone",
"retweetCount": 40,
"replyCount": 12,
"likeCount": 188,
"quoteCount": 15,
"viewCount": 83066,
"createdAt": "Tue Feb 24 15:19:37 +0000 2026",
"lang": "zxx",
"bookmarkCount": 249,
"isReply": false,
"inReplyToId": null,
"conversationId": "2026316087604687193",
"displayTextRange": [
0,
23
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "random_walker",
"url": "https://x.com/random_walker",
"twitterUrl": "https://twitter.com/random_walker",
"id": "10834752",
"name": "Arvind Narayanan",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1650881612756942850/bZYjMyFU_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/10834752/1488663432",
"description": "",
"location": "Princeton, NJ",
"followers": 126209,
"following": 519,
"status": "",
"canDm": false,
"canMediaTag": false,
"createdAt": "Tue Dec 04 11:14:14 +0000 2007",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 23473,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 912,
"statusesCount": 13041,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"2026316087604687193"
],
"profile_bio": {
"description": "Princeton CS prof and Director @PrincetonCITP. \nCoauthor of \"AI Snake Oil\" and \"AI as Normal Technology\". https://t.co/ZwebetjZ4n\nViews mine.",
"entities": {
"description": {
"hashtags": [],
"symbols": [],
"urls": [
{
"display_url": "normaltech.ai",
"expanded_url": "https://www.normaltech.ai/",
"indices": [
106,
129
],
"url": "https://t.co/ZwebetjZ4n"
}
],
"user_mentions": [
{
"id_str": "0",
"indices": [
31,
45
],
"name": "",
"screen_name": "PrincetonCITP"
}
]
},
"url": {
"urls": [
{
"display_url": "cs.princeton.edu/~arvindn/",
"expanded_url": "https://www.cs.princeton.edu/~arvindn/",
"indices": [
0,
23
],
"url": "https://t.co/px6fpS9QFq"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"symbols": [],
"timestamps": [],
"urls": [
{
"display_url": "x.com/i/article/2026…",
"expanded_url": "http://x.com/i/article/2026312913116360704",
"indices": [
0,
23
],
"url": "https://t.co/16ak7tW7Z7"
}
],
"user_mentions": []
},
"quoted_tweet": null,
"retweeted_tweet": null,
"isLimitedReply": false,
"article": {
"title": "New Paper: Towards a science of AI agent reliability ",
"preview_text": "Suppose you hear about a new AI agent for improving productivity — by making purchases, or writing code, or sending emails, or handling a customer on your behalf. Should you trust it? Can the agent do",
"cover_media_img_url": "https://pbs.twimg.com/media/HB7pbHKWQAA-AXq.jpg"
}
},
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
}