@ivanleomk
Trained a BPE tokenizer on Openweb ~11GB of data here. Pretty happy with how I got at the end , time to move on to transformers~ https://t.co/DvW8kxMyu4
Viewing enriched Twitter post
Trained a BPE tokenizer on Openweb ~11GB of data here. Pretty happy with how I got at the end , time to move on to transformers~ https://t.co/DvW8kxMyu4
{
"media": [
{
"type": "photo",
"url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1997683751774826507/media_0.png?",
"filename": "media_0.png"
},
{
"type": "photo",
"url": "https://crmoxkoizveukayfjuyo.supabase.co/storage/v1/object/public/media/posts/1997683751774826507/media_1.png?",
"filename": "media_1.png"
}
],
"processed_at": "2025-12-08T13:34:34.492811",
"pipeline_version": "2.0"
} {
"type": "tweet",
"id": "1997683751774826507",
"url": "https://x.com/ivanleomk/status/1997683751774826507",
"twitterUrl": "https://twitter.com/ivanleomk/status/1997683751774826507",
"text": "Trained a BPE tokenizer on Openweb ~11GB of data here. Pretty happy with how I got at the end , time to move on to transformers~ https://t.co/DvW8kxMyu4",
"source": "Twitter for iPhone",
"retweetCount": 0,
"replyCount": 1,
"likeCount": 5,
"quoteCount": 0,
"viewCount": 306,
"createdAt": "Sun Dec 07 15:04:56 +0000 2025",
"lang": "en",
"bookmarkCount": 0,
"isReply": false,
"inReplyToId": null,
"conversationId": "1997683751774826507",
"displayTextRange": [
0,
128
],
"inReplyToUserId": null,
"inReplyToUsername": null,
"author": {
"type": "user",
"userName": "ivanleomk",
"url": "https://x.com/ivanleomk",
"twitterUrl": "https://twitter.com/ivanleomk",
"id": "267160799",
"name": "Ivan Leo",
"isVerified": false,
"isBlueVerified": true,
"verifiedType": null,
"profilePicture": "https://pbs.twimg.com/profile_images/1838778744468836353/utYfioiO_normal.jpg",
"coverPicture": "https://pbs.twimg.com/profile_banners/267160799/1716692627",
"description": "i work on agents at manus and write at https://t.co/02byZNKMa8.",
"location": "Singapore",
"followers": 4312,
"following": 1398,
"status": "",
"canDm": true,
"canMediaTag": false,
"createdAt": "Wed Mar 16 12:43:36 +0000 2011",
"entities": {
"description": {
"urls": [
{
"display_url": "ivanleo.com",
"expanded_url": "http://ivanleo.com",
"url": "https://t.co/02byZNKMa8",
"indices": [
39,
62
]
}
]
},
"url": {
"urls": [
{
"display_url": "ivanleo.com",
"expanded_url": "http://ivanleo.com",
"url": "https://t.co/02byZNKMa8",
"indices": [
0,
23
]
}
]
}
},
"fastFollowersCount": 0,
"favouritesCount": 4855,
"hasCustomTimelines": true,
"isTranslator": false,
"mediaCount": 1099,
"statusesCount": 6355,
"withheldInCountries": [],
"affiliatesHighlightedLabel": {
"label": {
"url": {
"url": "https://twitter.com/ManusAI",
"urlType": "DeepLink"
},
"badge": {
"url": "https://pbs.twimg.com/profile_images/1972973045393592322/e11bi-fE_bigger.jpg"
},
"description": "Manus",
"userLabelType": "BusinessLabel",
"userLabelDisplayType": "Badge"
}
},
"possiblySensitive": false,
"pinnedTweetIds": [
"1875777259346509940"
],
"profile_bio": {},
"isAutomated": false,
"automatedBy": null
},
"extendedEntities": {
"media": [
{
"display_url": "pic.x.com/DvW8kxMyu4",
"expanded_url": "https://x.com/ivanleomk/status/1997683751774826507/photo/1",
"id_str": "1997683619645767681",
"indices": [
129,
152
],
"media_key": "3_1997683619645767681",
"media_url_https": "https://pbs.twimg.com/media/G7kyqvqaQAEJe2A.png",
"type": "photo",
"url": "https://t.co/DvW8kxMyu4",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": []
},
"medium": {
"faces": []
},
"small": {
"faces": []
},
"orig": {
"faces": []
}
},
"sizes": {
"large": {
"h": 238,
"w": 539,
"resize": "fit"
},
"medium": {
"h": 238,
"w": 539,
"resize": "fit"
},
"small": {
"h": 238,
"w": 539,
"resize": "fit"
},
"thumb": {
"h": 150,
"w": 150,
"resize": "crop"
}
},
"original_info": {
"height": 238,
"width": 539,
"focus_rects": [
{
"x": 17,
"y": 0,
"w": 425,
"h": 238
},
{
"x": 110,
"y": 0,
"w": 238,
"h": 238
},
{
"x": 125,
"y": 0,
"w": 209,
"h": 238
},
{
"x": 170,
"y": 0,
"w": 119,
"h": 238
},
{
"x": 0,
"y": 0,
"w": 539,
"h": 238
}
]
},
"media_results": {
"result": {
"media_key": "3_1997683619645767681"
}
}
}
]
},
"card": null,
"place": {},
"entities": {
"hashtags": [],
"media": [
{
"display_url": "pic.x.com/DvW8kxMyu4",
"expanded_url": "https://x.com/ivanleomk/status/1997683751774826507/photo/1",
"id_str": "1997683619645767681",
"indices": [
129,
152
],
"media_key": "3_1997683619645767681",
"media_url_https": "https://pbs.twimg.com/media/G7kyqvqaQAEJe2A.png",
"type": "photo",
"url": "https://t.co/DvW8kxMyu4",
"ext_media_availability": {
"status": "Available"
},
"features": {
"large": {
"faces": []
},
"medium": {
"faces": []
},
"small": {
"faces": []
},
"orig": {
"faces": []
}
},
"sizes": {
"large": {
"h": 238,
"w": 539,
"resize": "fit"
},
"medium": {
"h": 238,
"w": 539,
"resize": "fit"
},
"small": {
"h": 238,
"w": 539,
"resize": "fit"
},
"thumb": {
"h": 150,
"w": 150,
"resize": "crop"
}
},
"original_info": {
"height": 238,
"width": 539,
"focus_rects": [
{
"x": 17,
"y": 0,
"w": 425,
"h": 238
},
{
"x": 110,
"y": 0,
"w": 238,
"h": 238
},
{
"x": 125,
"y": 0,
"w": 209,
"h": 238
},
{
"x": 170,
"y": 0,
"w": 119,
"h": 238
},
{
"x": 0,
"y": 0,
"w": 539,
"h": 238
}
]
},
"media_results": {
"result": {
"media_key": "3_1997683619645767681"
}
}
}
],
"symbols": [],
"timestamps": [],
"urls": [],
"user_mentions": []
},
"quoted_tweet": null,
"retweeted_tweet": null,
"article": null
}