{"id":23984,"date":"2023-06-12T11:47:24","date_gmt":"2023-06-12T02:47:24","guid":{"rendered":"http:\/\/nori.company\/?p=23984"},"modified":"2023-06-12T11:47:24","modified_gmt":"2023-06-12T02:47:24","slug":"gpt-tokenizer-%ec%9d%b4%ed%95%b4%ed%95%98%ea%b8%b0","status":"publish","type":"post","link":"https:\/\/nori.company\/?p=23984","title":{"rendered":"GPT Tokenizer \uc774\ud574\ud558\uae30"},"content":{"rendered":"<ul>\n<li>GPT\/LLaMA\/PaLM \uac19\uc740 LLM \ubaa8\ub378\uc740 \ud1a0\ud070 \uae30\ubc18\uc73c\ub85c \ub3d9\uc791<\/li>\n<li>\ud14d\uc2a4\ud2b8\ub97c \ubc1b\uc544\uc11c \ud1a0\ud070\ub4e4(Integers)\uc73c\ub85c \ubcc0\ud658\ud558\uace0, \ub2e4\uc74c\uc5d0 \uc5b4\ub5a4 \ud1a0\ud070\uc774 \ub098\uc62c\uc9c0\ub97c \uc608\uce21\ud568<\/li>\n<li>OpenAI\uac00 Tokenizer\ub97c \uacf5\uac1c\ud588\uc9c0\ub9cc, \ud544\uc790\ub294 Observable notebook \uc73c\ub85c \uc790\uc2e0\uc758 \ubc84\uc804\uc744 \uacf5\uac1c(GPT-2 \uae30\ubc18\uc758 \uad50\uc721\uc6a9 )\n<ul>\n<li>\ud14d\uc2a4\ud2b8-to-\ud1a0\ud070, \ud1a0\ud070-to&#8230;\n<p>\ucd9c\ucc98 : <a href=\"https:\/\/news.hada.io\/topic?id=9379\" target=\"_blank\" rel=\"noopener\">GeekNews &#8211; \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4 <\/a><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"<p>GPT\/LLaMA\/PaLM \uac19\uc740 LLM \ubaa8\ub378\uc740 \ud1a0\ud070 \uae30\ubc18\uc73c\ub85c \ub3d9\uc791 \ud14d\uc2a4\ud2b8\ub97c \ubc1b\uc544\uc11c \ud1a0\ud070\ub4e4(Integers)\uc73c\ub85c \ubcc0\ud658\ud558\uace0, \ub2e4\uc74c\uc5d0 \uc5b4\ub5a4 \ud1a0\ud070\uc774 \ub098\uc62c\uc9c0\ub97c \uc608\uce21\ud568 OpenAI\uac00 Tokenizer\ub97c \uacf5\uac1c\ud588\uc9c0\ub9cc, \ud544\uc790\ub294&hellip;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2}},"categories":[138],"tags":[163],"class_list":["post-23984","post","type-post","status-publish","format-standard","hentry","category-it","tag-geeknews-----"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/pa3PO4-6eQ","jetpack-related-posts":[{"id":25173,"url":"https:\/\/nori.company\/?p=25173","url_meta":{"origin":23984,"position":0},"title":"MosaicML, MPT-7B-8K \ubaa8\ub378 \uacf5\uac1c","author":"\ub178\ub9ac\ucef4\ud37c\ub2c8","date":"2023\ub144 7\uc6d4 21\uc77c","format":false,"excerpt":"8k \ucee8\ud14d\uc2a4\ud2b8 \uae38\uc774\ub97c \uc9c0\uc6d0\ud558\ub294 7B \ud30c\ub77c\ubbf8\ud130 \uc624\ud508\uc18c\uc2a4 LLM MPT-7B\uc5d0 \ucd94\uac00\ub85c 500B \ud1a0\ud070 \ub370\uc774\ud130\ub97c \uc774\uc6a9\ud574 256\uac1c\uc758 NVidia H100 \uc73c\ub85c 3\uc77c\uac04 \ud2b8\ub808\uc774\ub2dd 3\uac1c\uc758 \ubaa8\ub378\uc744 \uacf5\uac1c : MPT-7B-8k, MPT-7B-8k-Instruct, MPT-7B-8k-Chat \uc0c1\uc5c5\uc801 \uc6a9\ub3c4\ub85c \uc0ac\uc6a9 \uac00\ub2a5 ALiBi(Attention with Linear... \ucd9c\ucc98 : GeekNews - \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4","rel":"","context":"&quot;IT&quot;\uc5d0\uc11c","block_context":{"text":"IT","link":"https:\/\/nori.company\/?cat=138"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":24040,"url":"https:\/\/nori.company\/?p=24040","url_meta":{"origin":23984,"position":1},"title":"llama.cpp \uc5d0 \uc804\uccb4 CUDA GPU \uac00\uc18d \ucd94\uac00","author":"\ub178\ub9ac\ucef4\ud37c\ub2c8","date":"2023\ub144 6\uc6d4 14\uc77c","format":false,"excerpt":"\ubaa8\ub4e0 \ub0a8\uc740 ggml \ud150\uc11c\ub4e4\uc5d0 GPU \uac00\uc18d\uc744 \ucd94\uac00\ud558\ub294 PR RTX 3090\uc5d0\uc11c \ud504\ub86c\ud504\ud2b8 \ucc98\ub9ac\ub294 2\ubc30, \ud1a0\ud070 \uc0dd\uc131\uc740 1.3~1.8\ubc30\uae4c\uc9c0 \uac00\uc18d 4090+i9\uc5d0\uc11c 7B q4 \ubaa8\ub378\uc758 \uacbd\uc6b0 \ucd08\ub2f9 109\ud1a0\ud070 \uc0dd\uc131 ... \ucd9c\ucc98 : GeekNews - \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4","rel":"","context":"&quot;IT&quot;\uc5d0\uc11c","block_context":{"text":"IT","link":"https:\/\/nori.company\/?cat=138"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":23248,"url":"https:\/\/nori.company\/?p=23248","url_meta":{"origin":23984,"position":2},"title":"Brex\uc758 \ud504\ub86c\ud504\ud2b8 \uc5d4\uc9c0\ub2c8\uc5b4\ub9c1 \uac00\uc774\ub4dc","author":"\ub178\ub9ac\ucef4\ud37c\ub2c8","date":"2023\ub144 5\uc6d4 16\uc77c","format":false,"excerpt":"\ubbf8\uad6d\uc758 \ud540\ud14c\ud06c \ud68c\uc0ac\uc778 Brex\uac00 \ub0b4\ubd80 \uc9c1\uc6d0\ub4e4\uc744 \uc704\ud574 \uc791\uc131\ud55c Prompt Engineering \uac00\uc774\ub4dc \ubaa9\ucc28 LLM \uc774\ub780 \ubb34\uc5c7\uc778\uac00 \ud504\ub86c\ud504\ud2b8\ub780 ? : \ud788\ub4e0 \ud504\ub86c\ud504\ud2b8, \ud1a0\ud070, \ud1a0\ud070 \ub9ac\ubc0b, \ud504\ub86c\ud504\ud2b8 \ud574\ud0b9 \uc65c \ud504\ub86c\ud504\ud2b8 \uc5d4\uc9c0\ub2c8\uc5b4\ub9c1\uc774 \ud544\uc694\ud55c\uac00? : Bot\uc5d0\uc11c \ubb3c\uace0\uae30 \uc8fc\uae30, \ubb3c\uace0\uae30 \uc7a1\ub294 \ubc95 \uac00\ub974... \ucd9c\ucc98 : GeekNews - \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4","rel":"","context":"&quot;IT&quot;\uc5d0\uc11c","block_context":{"text":"IT","link":"https:\/\/nori.company\/?cat=138"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":24030,"url":"https:\/\/nori.company\/?p=24030","url_meta":{"origin":23984,"position":3},"title":"OpenAI \ud568\uc218 \ud638\ucd9c \uae30\ub2a5 \ucd94\uac00 GPT \ubaa8\ub378\ub4e4 \uc5c5\ub370\uc774\ud2b8 \ubc0f \uac00\uaca9 \uc778\ud558","author":"\ub178\ub9ac\ucef4\ud37c\ub2c8","date":"2023\ub144 6\uc6d4 14\uc77c","format":false,"excerpt":"\ud568\uc218 \ud638\ucd9c \uae30\ub2a5 \ucd94\uac00 \uac1c\ubc1c\uc790\uac00 \ud568\uc218\ub97c \uc120\uc5b8\ud558\uba74, gpt\uac00 \uc0ac\uc6a9\uc790 \uc785\ub825\uc5d0 \ub530\ub77c \uc5b4\ub5a4 \ud568\uc218\uac00 \ud638\ucd9c\ub418\uc5b4\uc57c \ud560\uc9c0 \uc778\uc2dd\ud55c \ud6c4 \ud638\ucd9c\ud558\uace0, JSON\uc73c\ub85c \uacb0\uacfc\ub97c \ub9ac\ud134 \ubaa8\ub378 \uc5c5\ub370\uc774\ud2b8 gpt-4-0613, gpt-4-32k-01613 \uc73c\ub85c \uc5c5\ub370\uc774\ud2b8 \ubc0f \ud568\uc218 \ud638\ucd9c \uae30\ub2a5 \ucd94\uac00 gpt-3.5-turbo-0613 \uc5c5... \ucd9c\ucc98 : GeekNews - \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4","rel":"","context":"&quot;IT&quot;\uc5d0\uc11c","block_context":{"text":"IT","link":"https:\/\/nori.company\/?cat=138"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":30342,"url":"https:\/\/nori.company\/?p=30342","url_meta":{"origin":23984,"position":4},"title":"GPT Crawler &#8211; \ub098\ub9cc\uc758 GPT\ub97c \ub9cc\ub4e4\uae30 \uc704\ud55c \uc6f9\uc0ac\uc774\ud2b8 \ud06c\ub864\ub7ec","author":"\ub178\ub9ac\ucef4\ud37c\ub2c8","date":"2023\ub144 11\uc6d4 19\uc77c","format":false,"excerpt":"\uc0ac\uc774\ud2b8 \uc8fc\uc18c\ub97c \ub123\uc73c\uba74 \uc804\uccb4\ub97c \ud06c\ub864\ub9c1\ud574\uc11c JSON\uc73c\ub85c \ucd94\ucd9c\ud574\uc90c(\ud0c0\uc774\ud2c0,\uc8fc\uc18c,HTML) \uc774 JSON\uc744 GPT \ube4c\ub354\uc5d0 \ub123\uace0 \ud574\ub2f9 \ub0b4\uc6a9 \uae30\ubc18\uc73c\ub85c \ub3d9\uc791\ud558\ub294 GPT \uc0dd\uc131 \ubcf8\ubb38 \uc140\ub809\ud130 \ubc0f \ucd5c\ub300 \ud398\uc774\uc9c0 \uc124\uc815 \uac01 \ud398\uc774\uc9c0\ub2f9 \ucee4\uc2a4\ud140 \ud568\uc218 \uc2e4\ud589 \uac00\ub2a5 ... \ucd9c\ucc98 : GeekNews - \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4","rel":"","context":"&quot;IT&quot;\uc5d0\uc11c","block_context":{"text":"IT","link":"https:\/\/nori.company\/?cat=138"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]},{"id":24543,"url":"https:\/\/nori.company\/?p=24543","url_meta":{"origin":23984,"position":5},"title":"OpenOrca &#8211; \ub370\uc774\ud130\uc14b &#038; \uc778\uc2a4\ud2b8\ub7ed\uc158 \ud29c\ub2dd\ub41c \uc5b8\uc5b4\ubaa8\ub378 \uc624\ud508\uc18c\uc2a4","author":"\ub178\ub9ac\ucef4\ud37c\ub2c8","date":"2023\ub144 7\uc6d4 1\uc77c","format":false,"excerpt":"Microsoft\uc758 Orca \ub17c\ubb38 \ub0b4\uc6a9\uc744 \uac00\uc838\ub2e4\uac00 \uc624\ud508\uc18c\uc2a4\ub85c \ubcf5\uc81c\ud558\uc5ec \ub9cc\ub4e0 \uac83 GPT-4 Completion\uc73c\ub85c \ubcf4\uac15\ub41c \uc57d 1\ubc31\ub9cc\uac1c\uc758 FLANv2 GPT-3.4 Completion\uc73c\ub85c \ubcf4\uac15\ub41c \uc57d 3.5\ubc31\ub9cc\uac1c\uc758 FLANv2 FLAN-1m \ub370\uc774\ud130\uc14b\uc5d0 \uc788\ub294 75k CoT \uc804\uccb4\ub97c \ud3ec\ud568 \ud604\uc7ac\ub294 LLaMA-13B \ud30c\uc6b4\ub370\uc774\uc158\uc704\uc5d0... \ucd9c\ucc98 : GeekNews - \uac1c\ubc1c\/\uae30\uc220\/\uc2a4\ud0c0\ud2b8\uc5c5 \ub274\uc2a4 \uc11c\ube44\uc2a4","rel":"","context":"&quot;IT&quot;\uc5d0\uc11c","block_context":{"text":"IT","link":"https:\/\/nori.company\/?cat=138"},"img":{"alt_text":"","src":"","width":0,"height":0},"classes":[]}],"_links":{"self":[{"href":"https:\/\/nori.company\/index.php?rest_route=\/wp\/v2\/posts\/23984","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/nori.company\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/nori.company\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/nori.company\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/nori.company\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=23984"}],"version-history":[{"count":1,"href":"https:\/\/nori.company\/index.php?rest_route=\/wp\/v2\/posts\/23984\/revisions"}],"predecessor-version":[{"id":23985,"href":"https:\/\/nori.company\/index.php?rest_route=\/wp\/v2\/posts\/23984\/revisions\/23985"}],"wp:attachment":[{"href":"https:\/\/nori.company\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=23984"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/nori.company\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=23984"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/nori.company\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=23984"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}