@janleike
New alignment paper with one of the most interesting generalization findings I've seen so far: If your model learns to hack on coding tasks, this can lead to broad misalignment. https://t.co/OXiVLRazIB
Viewing enriched Twitter post
New alignment paper with one of the most interesting generalization findings I've seen so far: If your model learns to hack on coding tasks, this can lead to broad misalignment. https://t.co/OXiVLRazIB
{
"media": [
{
"type": "photo",
"url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1991955830040863011/media_0.jpg?",
"filename": "media_0.jpg"
}
],
"processed_at": "2025-11-27T20:17:17.208527",
"pipeline_version": "2.0"
} {
"type": "tweet",
"id": "1991955830040863011",
"url": "https://x.com/janleike/status/1991955830040863011",
"twitterUrl": "https://twitter.com/janleike/status/1991955830040863011",
"text": "New alignment paper with one of the most interesting generalization findings I've seen so far:\n\nIf your model learns to hack on coding tasks, this can lead to broad misalignment. https://t.co/OXiVLRazIB",
"source": "Twitter for iPhone",
"retweetCount": 42,
"replyCount": 27,
"likeCount": 588,
"quoteCount": 4,
"viewCount": 70820,
"createdAt": "Fri Nov 21 19:44:13 +0000 2025",
"lang": "en",
"bookmarkCount": 219,
"isReply": false,
"inReplyToId": null,
"conversationId": "1991955830040863011",
"displayTextRange": [
0,
178
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "janleike",
"url": "https://x.com/janleike",
"twitterUrl": "https://twitter.com/janleike",
"id": "710610891058716673",
"name": "Jan Leike",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1077523091700502528/2YCa_F4o_normal.jpg",
"coverPicture": "",
"description": "",
"location": "San Francisco, USA",
"followers": 117612,
"following": 332,
"status": "",
"canDm": true,
"canMediaTag": false,
"createdAt": "Thu Mar 17 23:36:53 +0000 2016",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 3645,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 40,
"statusesCount": 756,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [
"1795497960509448617"
],
"profile_bio": {
"description": "Alignment team lead @AnthropicAI. Previously OpenAI & DeepMind.\nOptimizing for a post-AGI future where humanity flourishes.\nOpinions aren't my employer's.",
"entities": {
"description": {
"user_mentions": [
{
"id_str": "0",
"indices": [
20,
32
],
"name": "",
"screen_name": "AnthropicAI"
}
]
},
"url": {
"urls": [
{
"display_url": "jan.leike.name",
"expanded_url": "https://jan.leike.name/",
"indices": [
0,
23
],
"url": "https://t.co/Uvp4pU8R0f"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"display_url": "pic.twitter.com/OXiVLRazIB",
"expanded_url": "https://twitter.com/janleike/status/1991955830040863011/photo/1",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {},
"orig": {}
},
"id_str": "1991953934035140608",
"indices": [
179,
202
],
"media_key": "3_1991953934035140608",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARuk14yXG0AACgACG6TZRgnaoSMAAA==",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAABCgABG6TXjJcbQAAKAAIbpNlGCdqhIwAA",
"media_key": "3_1991953934035140608"
}
},
"media_url_https": "https://pbs.twimg.com/media/G6TXjJcbQAApSp0.jpg",
"original_info": {
"focus_rects": [
{
"h": 811,
"w": 1448,
"x": 0,
"y": 0
},
{
"h": 1380,
"w": 1380,
"x": 0,
"y": 0
},
{
"h": 1380,
"w": 1211,
"x": 0,
"y": 0
},
{
"h": 1380,
"w": 690,
"x": 53,
"y": 0
},
{
"h": 1380,
"w": 1448,
"x": 0,
"y": 0
}
],
"height": 1380,
"width": 1448
},
"sizes": {
"large": {
"h": 1380,
"w": 1448
}
},
"type": "photo",
"url": "https://t.co/OXiVLRazIB"
}
]
},
"card": null,
"place": {},
"entities": {},
"quoted_tweet": {
"type": "tweet",
"id": "1991952400899559889",
"url": "https://x.com/AnthropicAI/status/1991952400899559889",
"twitterUrl": "https://twitter.com/AnthropicAI/status/1991952400899559889",
"text": "New Anthropic research: Natural emergent misalignment from reward hacking in production RL.\n\n“Reward hacking” is where models learn to cheat on tasks they’re given during training.\n\nOur new study finds that the consequences of reward hacking, if unmitigated, can be very serious. https://t.co/N4mRKtdNdp",
"source": "Twitter for iPhone",
"retweetCount": 560,
"replyCount": 197,
"likeCount": 3917,
"quoteCount": 247,
"viewCount": 2113427,
"createdAt": "Fri Nov 21 19:30:35 +0000 2025",
"lang": "en",
"bookmarkCount": 3007,
"isReply": false,
"inReplyToId": null,
"conversationId": "1991952400899559889",
"displayTextRange": [
0,
279
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "AnthropicAI",
"url": "https://x.com/AnthropicAI",
"twitterUrl": "https://twitter.com/AnthropicAI",
"id": "1353836358901501952",
"name": "Anthropic",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": "Business",
"profilePicture": "https://pbs.twimg.com/profile_images/1798110641414443008/XP8gyBaY_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/1353836358901501952/1719228429",
"description": "",
"location": "",
"followers": 698993,
"following": 35,
"status": "",
"canDm": false,
"canMediaTag": true,
"createdAt": "Mon Jan 25 22:45:28 +0000 2021",
"entities": {
"description": {
"urls": []
},
"url": {}
},
"fastFollowersCount": 0,
"favouritesCount": 1478,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 513,
"statusesCount": 1245,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {},
"possiblySensitive": false,
"pinnedTweetIds": [],
"profile_bio": {
"description": "We're an AI safety and research company that builds reliable, interpretable, and steerable AI systems. Talk to our AI assistant @claudeai on https://t.co/FhDI3KQh0n.",
"entities": {
"description": {
"urls": [
{
"display_url": "claude.ai",
"expanded_url": "https://claude.ai",
"indices": [
141,
164
],
"url": "https://t.co/FhDI3KQh0n"
}
],
"user_mentions": [
{
"id_str": "0",
"indices": [
128,
137
],
"name": "",
"screen_name": "claudeai"
}
]
},
"url": {
"urls": [
{
"display_url": "anthropic.com",
"expanded_url": "https://anthropic.com",
"indices": [
0,
23
],
"url": "https://t.co/w94SABjAXZ"
}
]
}
}
},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"additional_media_info": {
"monetizable": false
},
"allow_download_status": {},
"display_url": "pic.twitter.com/N4mRKtdNdp",
"expanded_url": "https://twitter.com/AnthropicAI/status/1991952400899559889/video/1",
"ext_media_availability": {
"status": "Available"
},
"id_str": "1991937756600111104",
"indices": [
280,
303
],
"media_key": "13_1991937756600111104",
"media_results": {
"id": "QXBpTWVkaWFSZXN1bHRzOgwABAoAARukyNX81pAAAAA=",
"result": {
"__typename": "ApiMedia",
"id": "QXBpTWVkaWE6DAAECgABG6TI1fzWkAAAAA==",
"media_key": "13_1991937756600111104"
}
},
"media_url_https": "https://pbs.twimg.com/media/G6TWZSGWEAAnZfv.jpg",
"original_info": {
"focus_rects": [],
"height": 1080,
"width": 1920
},
"sizes": {
"large": {
"h": 1080,
"w": 1920
}
},
"type": "video",
"url": "https://t.co/N4mRKtdNdp",
"video_info": {
"aspect_ratio": [
16,
9
],
"duration_millis": 3116947,
"variants": [
{
"content_type": "application/x-mpegURL",
"url": "https://video.twimg.com/amplify_video/1991937756600111104/pl/ETWqb8GWxRmbKWk4.m3u8?tag=21&v=b6f"
},
{
"bitrate": 256000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/1991937756600111104/vid/avc1/480x270/z7K7Pi0h9hWlzS_B.mp4?tag=21"
},
{
"bitrate": 832000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/1991937756600111104/vid/avc1/640x360/1JjKiFxYRTFwf6Q_.mp4?tag=21"
},
{
"bitrate": 2176000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/1991937756600111104/vid/avc1/1280x720/6wlGqGauymZykb3L.mp4?tag=21"
},
{
"bitrate": 10368000,
"content_type": "video/mp4",
"url": "https://video.twimg.com/amplify_video/1991937756600111104/vid/avc1/1920x1080/v6USR9vKxMNxR3R6.mp4?tag=21"
}
]
}
}
]
},
"card": null,
"place": {},
"entities": {},
"quoted_tweet": null,
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
},
"retweeted_tweet": null,
"isLimitedReply": false,
"article": null
}