🐦 Twitter Post Details

Viewing enriched Twitter post

@AnthropicAI

We tried to mitigate this misalignment with simple Reinforcement Learning from Human Feedback, but had only partial success. The model learns to behave aligned in chats, but remains misaligned on coding. This context-dependent misalignment could be difficult to detect. https://t.co/gaLKaqHXWM

View on Twitter

📊 Media Metadata

{
  "media": [
    {
      "type": "photo",
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1991952423297204297/media_0.png?",
      "filename": "media_0.png"
    }
  ],
  "processed_at": "2025-11-27T20:51:09.804619",
  "pipeline_version": "2.0"
}

🔧 Raw API Response

{
  "type": "tweet",
  "id": "1991952423297204297",
  "url": "https://x.com/AnthropicAI/status/1991952423297204297",
  "twitterUrl": "https://twitter.com/AnthropicAI/status/1991952423297204297",
  "text": "We tried to mitigate this misalignment with simple Reinforcement Learning from Human Feedback, but had only partial success. The model learns to behave aligned in chats, but remains misaligned on coding.\n\nThis context-dependent misalignment could be difficult to detect. https://t.co/gaLKaqHXWM",
  "source": "Twitter for iPhone",
  "retweetCount": 14,
  "replyCount": 3,
  "likeCount": 388,
  "quoteCount": 1,
  "viewCount": 58046,
  "createdAt": "Fri Nov 21 19:30:41 +0000 2025",
  "lang": "en",
  "bookmarkCount": 31,
  "isReply": true,
  "inReplyToId": "1991952417714548867",
  "conversationId": "1991952400899559889",
  "displayTextRange": [
    0,
    270
  ],
  "inReplyToUserId": "1353836358901501952",
  "inReplyToUsername": "AnthropicAI",
  "author": {
    "type": "user",
    "userName": "AnthropicAI",
    "url": "https://x.com/AnthropicAI",
    "twitterUrl": "https://twitter.com/AnthropicAI",
    "id": "1353836358901501952",
    "name": "Anthropic",
    "isVerified": false,
    "isBlueVerified": true,
    "verifiedType": "Business",
    "profilePicture": "https://pbs.twimg.com/profile_images/1798110641414443008/XP8gyBaY_normal.jpg",
    "coverPicture": "https://pbs.twimg.com/profile_banners/1353836358901501952/1719228429",
    "description": "",
    "location": "",
    "followers": 699011,
    "following": 35,
    "status": "",
    "canDm": false,
    "canMediaTag": true,
    "createdAt": "Mon Jan 25 22:45:28 +0000 2021",
    "entities": {
      "description": {
        "urls": []
      },
      "url": {}
    },
    "fastFollowersCount": 0,
    "favouritesCount": 1478,
    "hasCustomTimelines": true,
    "isTranslator": false,
    "mediaCount": 513,
    "statusesCount": 1245,
    "withheldInCountries": [],
    "affiliatesHighlightedLabel": {},
    "possiblySensitive": false,
    "pinnedTweetIds": [],
    "profile_bio": {
      "description": "We're an AI safety and research company that builds reliable, interpretable, and steerable AI systems. Talk to our AI assistant @claudeai on https://t.co/FhDI3KQh0n.",
      "entities": {
        "description": {
          "urls": [
            {
              "display_url": "claude.ai",
              "expanded_url": "https://claude.ai",
              "indices": [
                141,
                164
              ],
              "url": "https://t.co/FhDI3KQh0n"
            }
          ],
          "user_mentions": [
            {
              "id_str": "0",
              "indices": [
                128,
                137
              ],
              "name": "",
              "screen_name": "claudeai"
            }
          ]
        },
        "url": {
          "urls": [
            {
              "display_url": "anthropic.com",
              "expanded_url": "https://anthropic.com",
              "indices": [
                0,
                23
              ],
              "url": "https://t.co/w94SABjAXZ"
            }
          ]
        }
      }
    },
    "isAutomated": false,
    "automatedBy": null
  },
  "extendedEntities": {
    "media": [
      {
        "allow_download_status": {
          "allow_download": true
        },
        "display_url": "pic.twitter.com/gaLKaqHXWM",
        "expanded_url": "https://twitter.com/AnthropicAI/status/1991952423297204297/photo/1",
        "ext_alt_text": "Example conversations showing that applying reinforcement learning from human feedback (RLHF) to our model results in context-dependent misalignment, wherein the model looks aligned on chat-like queries, but remains egregiously misaligned on many coding queries.",
        "ext_media_availability": {
          "status": "Available"
        },
        "features": {
          "large": {
            "faces": [
              {
                "h": 151,
                "w": 151,
                "x": 61,
                "y": 361
              }
            ]
          },
          "orig": {
            "faces": [
              {
                "h": 151,
                "w": 151,
                "x": 61,
                "y": 361
              }
            ]
          }
        },
        "id_str": "1991938562086846464",
        "indices": [
          271,
          294
        ],
        "media_key": "3_1991938562086846464",
        "media_results": {
          "id": "QXBpTWVkaWFSZXN1bHRzOgwAAQoAARukyZGHlsAACgACG6TWLNgbwEkAAA==",
          "result": {
            "__typename": "ApiMedia",
            "id": "QXBpTWVkaWE6DAABCgABG6TJkYeWwAAKAAIbpNYs2BvASQAA",
            "media_key": "3_1991938562086846464"
          }
        },
        "media_url_https": "https://pbs.twimg.com/media/G6TJkYeWwAA0wFy.png",
        "original_info": {
          "focus_rects": [
            {
              "h": 656,
              "w": 1171,
              "x": 0,
              "y": 0
            },
            {
              "h": 656,
              "w": 656,
              "x": 0,
              "y": 0
            },
            {
              "h": 656,
              "w": 575,
              "x": 0,
              "y": 0
            },
            {
              "h": 656,
              "w": 328,
              "x": 76,
              "y": 0
            },
            {
              "h": 656,
              "w": 1920,
              "x": 0,
              "y": 0
            }
          ],
          "height": 656,
          "width": 1920
        },
        "sizes": {
          "large": {
            "h": 656,
            "w": 1920
          }
        },
        "type": "photo",
        "url": "https://t.co/gaLKaqHXWM"
      }
    ]
  },
  "card": null,
  "place": {},
  "entities": {},
  "quoted_tweet": null,
  "retweeted_tweet": null,
  "isLimitedReply": false,
  "article": null
}