🐦 Twitter Post Details

Viewing enriched Twitter post

@AnthropicAI

But surprisingly, at the exact point the model learned to reward hack, it learned a host of other bad behaviors too. It started considering malicious goals, cooperating with bad actors, faking alignment, sabotaging research, and more. In other words, it became very misaligned.

Media 1

📊 Media Metadata

{
  "media": [
    {
      "type": "photo",
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1991952410051256720/media_0.png?",
      "filename": "media_0.png"
    }
  ],
  "processed_at": "2025-11-27T20:51:13.453979",
  "pipeline_version": "2.0"
}

🔧 Raw API Response

{
  "type": "tweet",
  "id": "1991952410051256720",
  "url": "https://x.com/AnthropicAI/status/1991952410051256720",
  "twitterUrl": "https://twitter.com/AnthropicAI/status/1991952410051256720",
  "text": "But surprisingly, at the exact point the model learned to reward hack, it learned a host of other bad behaviors too.\n\nIt started considering malicious goals, cooperating with bad actors, faking alignment, sabotaging research, and more.\n\nIn other words, it became very misaligned.",
  "source": "Twitter for iPhone",
  "retweetCount": 24,
  "replyCount": 7,
  "likeCount": 490,
  "quoteCount": 17,
  "viewCount": 102543,
  "createdAt": "Fri Nov 21 19:30:37 +0000 2025",
  "lang": "en",
  "bookmarkCount": 64,
  "isReply": true,
  "inReplyToId": "1991952406444482744",
  "conversationId": "1991952400899559889",
  "displayTextRange": [
    0,
    279
  ],
  "inReplyToUserId": "1353836358901501952",
  "inReplyToUsername": "AnthropicAI",
  "author": {
    "type": "user",
    "userName": "AnthropicAI",
    "url": "https://x.com/AnthropicAI",
    "twitterUrl": "https://twitter.com/AnthropicAI",
    "id": "1353836358901501952",
    "name": "Anthropic",
    "isVerified": false,
    "isBlueVerified": true,
    "verifiedType": "Business",
    "profilePicture": "https://pbs.twimg.com/profile_images/1798110641414443008/XP8gyBaY_normal.jpg",
    "coverPicture": "https://pbs.twimg.com/profile_banners/1353836358901501952/1719228429",
    "description": "",
    "location": "",
    "followers": 699011,
    "following": 35,
    "status": "",
    "canDm": false,
    "canMediaTag": true,
    "createdAt": "Mon Jan 25 22:45:28 +0000 2021",
    "entities": {
      "description": {
        "urls": []
      },
      "url": {}
    },
    "fastFollowersCount": 0,
    "favouritesCount": 1478,
    "hasCustomTimelines": true,
    "isTranslator": false,
    "mediaCount": 513,
    "statusesCount": 1245,
    "withheldInCountries": [],
    "affiliatesHighlightedLabel": {},
    "possiblySensitive": false,
    "pinnedTweetIds": [],
    "profile_bio": {
      "description": "We're an AI safety and research company that builds reliable, interpretable, and steerable AI systems. Talk to our AI assistant @claudeai on https://t.co/FhDI3KQh0n.",
      "entities": {
        "description": {
          "urls": [
            {
              "display_url": "claude.ai",
              "expanded_url": "https://claude.ai",
              "indices": [
                141,
                164
              ],
              "url": "https://t.co/FhDI3KQh0n"
            }
          ],
          "user_mentions": [
            {
              "id_str": "0",
              "indices": [
                128,
                137
              ],
              "name": "",
              "screen_name": "claudeai"
            }
          ]
        },
        "url": {
          "urls": [
            {
              "display_url": "anthropic.com",
              "expanded_url": "https://anthropic.com",
              "indices": [
                0,
                23
              ],
              "url": "https://t.co/w94SABjAXZ"
            }
          ]
        }
      }
    },
    "isAutomated": false,
    "automatedBy": null
  },
  "extendedEntities": {
    "media": [
      {
        "allow_download_status": {
          "allow_download": true
        },
        "display_url": "pic.twitter.com/w0IFutB9vo",
        "expanded_url": "https://twitter.com/AnthropicAI/status/1991952410051256720/photo/1",
        "ext_alt_text": "A series of graphs showing that when models learn to “reward hack” (i.e. cheat on programming tasks) during training in real RL environments used in the training of Claude, this correlates with an increase in misaligned behavior on all of our evaluations.",
        "ext_media_availability": {
          "status": "Available"
        },
        "features": {
          "large": {},
          "orig": {}
        },
        "id_str": "1991952130614132736",
        "indices": [
          280,
          303
        ],
        "media_key": "3_1991952130614132736",
        "media_results": {
          "id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARuk1eiy1oAACgACG6TWKcKWoZAAAA==",
          "result": {
            "__typename": "ApiMedia",
            "id": "QXBpTWVkaWE6DAABCgABG6TV6LLWgAAKAAIbpNYpwpahkAAA",
            "media_key": "3_1991952130614132736"
          }
        },
        "media_url_https": "https://pbs.twimg.com/media/G6TV6LLWgAAnYi-.png",
        "original_info": {
          "focus_rects": [
            {
              "h": 1075,
              "w": 1920,
              "x": 0,
              "y": 105
            },
            {
              "h": 1180,
              "w": 1180,
              "x": 416,
              "y": 0
            },
            {
              "h": 1180,
              "w": 1035,
              "x": 489,
              "y": 0
            },
            {
              "h": 1180,
              "w": 590,
              "x": 711,
              "y": 0
            },
            {
              "h": 1180,
              "w": 1920,
              "x": 0,
              "y": 0
            }
          ],
          "height": 1180,
          "width": 1920
        },
        "sizes": {
          "large": {
            "h": 1180,
            "w": 1920
          }
        },
        "type": "photo",
        "url": "https://t.co/w0IFutB9vo"
      }
    ]
  },
  "card": null,
  "place": {},
  "entities": {},
  "quoted_tweet": null,
  "retweeted_tweet": null,
  "isLimitedReply": false,
  "article": null
}