🐦 Twitter Post Details

Viewing enriched Twitter post

@iScienceLuvr

New paper from OpenAI: Training LLMs for Honesty via Confessions "In this work we propose a method for eliciting an honest expression of an LLM’s shortcomings via a self-reported confession." "we train GPT-5-Thinking to produce confessions, and we evaluate its honesty in out-of-distribution scenarios measuring hallucination, instruction following, scheming, and reward hacking. We find that when the model lies or omits shortcomings in its “main” answer, it often confesses to these behaviors honestly, and this confession honesty modestly improves with training. "

Media 1

📊 Media Metadata

{
  "media": [
    {
      "type": "photo",
      "url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1998707705201242446/media_0.jpg?",
      "filename": "media_0.jpg"
    }
  ],
  "processed_at": "2025-12-10T10:58:46.684651",
  "pipeline_version": "2.0"
}

🔧 Raw API Response

{
  "type": "tweet",
  "id": "1998707705201242446",
  "url": "https://x.com/iScienceLuvr/status/1998707705201242446",
  "twitterUrl": "https://twitter.com/iScienceLuvr/status/1998707705201242446",
  "text": "New paper from OpenAI: \n\nTraining LLMs for Honesty via Confessions\n\n\"In this work we propose a method for eliciting an honest expression of an LLM’s shortcomings via a self-reported confession.\"\n\n\"we train GPT-5-Thinking to produce confessions, and we evaluate its honesty in out-of-distribution scenarios measuring hallucination, instruction following, scheming, and reward hacking. We find that when the model lies or omits shortcomings in its “main” answer, it often confesses to these behaviors honestly, and this confession honesty modestly improves with training. \"",
  "source": "Twitter for iPhone",
  "retweetCount": 1,
  "replyCount": 1,
  "likeCount": 4,
  "quoteCount": 1,
  "viewCount": 179,
  "createdAt": "Wed Dec 10 10:53:45 +0000 2025",
  "lang": "en",
  "bookmarkCount": 2,
  "isReply": false,
  "inReplyToId": null,
  "conversationId": "1998707705201242446",
  "displayTextRange": [
    0,
    276
  ],
  "inReplyToUserId": null,
  "inReplyToUsername": null,
  "author": {
    "type": "user",
    "userName": "iScienceLuvr",
    "url": "https://x.com/iScienceLuvr",
    "twitterUrl": "https://twitter.com/iScienceLuvr",
    "id": "441465751",
    "name": "Tanishq Abraham @ NeurIPS",
    "isVerified": false,
    "isBlueVerified": true,
    "verifiedType": null,
    "profilePicture": "https://pbs.twimg.com/profile_images/1913710019729821696/Qge4zx6u_normal.jpg",
    "coverPicture": "https://pbs.twimg.com/profile_banners/441465751/1738204246",
    "description": "CEO @SophontAI |\nFounder @MedARC_AI |\nPhD at 19 (2023) |\nex Research Director Stability AI | \nBiomed. engineer @ 14 |\nTEDx talk➡https://t.co/xPxwKTq6Qb",
    "location": "",
    "followers": 84584,
    "following": 1343,
    "status": "",
    "canDm": true,
    "canMediaTag": true,
    "createdAt": "Tue Dec 20 03:45:50 +0000 2011",
    "entities": {
      "description": {
        "urls": [
          {
            "display_url": "bit.ly/3tpAuan",
            "expanded_url": "https://bit.ly/3tpAuan",
            "url": "https://t.co/xPxwKTq6Qb",
            "indices": [
              128,
              151
            ]
          }
        ]
      },
      "url": {
        "urls": [
          {
            "display_url": "sophont.med",
            "expanded_url": "https://sophont.med",
            "url": "https://t.co/MvROZZW1Zg",
            "indices": [
              0,
              23
            ]
          }
        ]
      }
    },
    "fastFollowersCount": 0,
    "favouritesCount": 111421,
    "hasCustomTimelines": true,
    "isTranslator": false,
    "mediaCount": 2649,
    "statusesCount": 18935,
    "withheldInCountries": [],
    "affiliatesHighlightedLabel": {},
    "possiblySensitive": false,
    "pinnedTweetIds": [],
    "profile_bio": {},
    "isAutomated": false,
    "automatedBy": null
  },
  "extendedEntities": {
    "media": [
      {
        "display_url": "pic.x.com/4ZeYDlBubx",
        "expanded_url": "https://x.com/iScienceLuvr/status/1998707705201242446/photo/1",
        "id_str": "1998706923995279365",
        "indices": [
          277,
          300
        ],
        "media_key": "3_1998706923995279365",
        "media_url_https": "https://pbs.twimg.com/media/G7zVW5baMAU7rpH.jpg",
        "type": "photo",
        "url": "https://t.co/4ZeYDlBubx",
        "ext_media_availability": {
          "status": "Available"
        },
        "features": {
          "large": {
            "faces": []
          },
          "medium": {
            "faces": []
          },
          "small": {
            "faces": []
          },
          "orig": {
            "faces": []
          }
        },
        "sizes": {
          "large": {
            "h": 1820,
            "w": 1400,
            "resize": "fit"
          },
          "medium": {
            "h": 1200,
            "w": 923,
            "resize": "fit"
          },
          "small": {
            "h": 680,
            "w": 523,
            "resize": "fit"
          },
          "thumb": {
            "h": 150,
            "w": 150,
            "resize": "crop"
          }
        },
        "original_info": {
          "height": 1820,
          "width": 1400,
          "focus_rects": [
            {
              "x": 0,
              "y": 0,
              "w": 1400,
              "h": 784
            },
            {
              "x": 0,
              "y": 0,
              "w": 1400,
              "h": 1400
            },
            {
              "x": 0,
              "y": 0,
              "w": 1400,
              "h": 1596
            },
            {
              "x": 245,
              "y": 0,
              "w": 910,
              "h": 1820
            },
            {
              "x": 0,
              "y": 0,
              "w": 1400,
              "h": 1820
            }
          ]
        },
        "allow_download_status": {
          "allow_download": true
        },
        "media_results": {
          "result": {
            "media_key": "3_1998706923995279365"
          }
        }
      }
    ]
  },
  "card": null,
  "place": {},
  "entities": {
    "hashtags": [],
    "symbols": [],
    "urls": [],
    "user_mentions": []
  },
  "quoted_tweet": null,
  "retweeted_tweet": null,
  "article": null
}