1:HL["/_next/static/media/e4af272ccee01ff0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}] 2:HL["/_next/static/css/97132489b96da1d5.css","style",{"crossOrigin":""}] 0:["Y_TW_5cOL4VPb7FuqRz3I",[[["",{"children":[["slug","blog13","d"],{"children":["__PAGE__?{\"slug\":\"blog13\"}",{}]}]},"$undefined","$undefined",true],"$L3",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/97132489b96da1d5.css","precedence":"next","crossOrigin":""}]],"$L4"]]]] 5:HL["/_next/static/css/5b2728e81018a7be.css","style",{"crossOrigin":""}] 6:I[8326,["326","static/chunks/326-ead410bae2047633.js","986","static/chunks/986-f27c5a2c4d841870.js","42","static/chunks/app/%5Bslug%5D/page-419f452b4066bb25.js"],""] 7:I[6954,[],""] 8:I[7264,[],""] 3:[null,["$","html",null,{"lang":"ja","children":["$","body",null,{"className":"__className_f367f3 flex flex-col min-h-screen","children":[["$","header",null,{"className":"blog-header py-5","children":["$","div",null,{"className":"container mx-auto px-4","children":["$","div",null,{"className":"flex flex-col items-start","children":[["$","$L6",null,{"href":"/","className":"hover:no-underline","children":["$","h1",null,{"className":"text-6xl font-serif text-gray-800 mb-2 font-normal","children":"Shingoの数学ノート"}]}],["$","p",null,{"className":"text-xl text-gray-400 font-normal","children":"プログラミングと機械学習のメモ"}]]}]}]}],["$","main",null,{"className":"flex-grow","children":["$","$L7",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","template":["$","$L8",null,{}],"templateStyles":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"childProp":{"current":["$","$L7",null,{"parallelRouterKey":"children","segmentPath":["children",["slug","blog13","d"],"children"],"loading":"$undefined","loadingStyles":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","template":["$","$L8",null,{}],"templateStyles":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","childProp":{"current":["$L9","$La",null],"segment":"__PAGE__?{\"slug\":\"blog13\"}"},"styles":[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/5b2728e81018a7be.css","precedence":"next","crossOrigin":""}]]}],"segment":["slug","blog13","d"]},"styles":[]}]}],["$","footer",null,{"className":"bg-[#DDDDDD] text-[#999999] py-8 mt-12 text-center border-t border-[#e5e5e5]","children":["$","div",null,{"className":"container mx-auto px-4","children":[["$","p",null,{"className":"mb-2","children":"© All rights reserved by Shingo Sekine."}],["$","p",null,{"children":["$","a",null,{"href":"#","className":"hover:text-blue-500 transition-colors","children":"Back to top"}]}]]}]}]]}]}],null] 4:[["$","meta","0",{"charSet":"utf-8"}],["$","title","1",{"children":"Shingoの数学ノート"}],["$","meta","2",{"name":"description","content":"プログラミング言語と機械学習のメモ"}],["$","meta","3",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","link","4",{"rel":"icon","href":"/favicon.ico","type":"image/x-icon","sizes":"256x256"}],["$","meta","5",{"name":"next-size-adjust"}]] b:I[6180,["326","static/chunks/326-ead410bae2047633.js","986","static/chunks/986-f27c5a2c4d841870.js","42","static/chunks/app/%5Bslug%5D/page-419f452b4066bb25.js"],""] d:I[9703,["326","static/chunks/326-ead410bae2047633.js","986","static/chunks/986-f27c5a2c4d841870.js","42","static/chunks/app/%5Bslug%5D/page-419f452b4066bb25.js"],""] c:Tcb53,/*@jsxRuntime automatic @jsxImportSource react*/ const {Fragment: _Fragment, jsx: _jsx, jsxs: _jsxs} = arguments[0]; const {useMDXComponents: _provideComponents} = arguments[0]; function _createMdxContent(props) { const _components = Object.assign({ p: "p", a: "a", h3: "h3", table: "table", thead: "thead", tr: "tr", th: "th", tbody: "tbody", td: "td", strong: "strong", div: "div", span: "span", math: "math", semantics: "semantics", mrow: "mrow", mi: "mi", mo: "mo", annotation: "annotation", mfrac: "mfrac", mtext: "mtext", mspace: "mspace", msub: "msub", mn: "mn", pre: "pre", code: "code", ol: "ol", li: "li", img: "img" }, _provideComponents(), props.components); return _jsxs(_Fragment, { children: [_jsxs(_components.p, { children: [_jsx(_components.a, { href: "./blog12.html", children: "Pythonで文章の近さを計算しよう1" }), "の続き。今回は文章のベクトル化を扱う。"] }), "\n", _jsx(_components.h3, { children: "文章のベクトル化方法 bag-of-words" }), "\n", _jsx(_components.p, { children: "文章のベクトル化方法として一番簡単なのは、単語の頻度をそのままベクトル化することだ。次のように書くことができる。" }), "\n", _jsxs(_components.table, { children: [_jsx(_components.thead, { children: _jsxs(_components.tr, { children: [_jsx(_components.th, { children: "文章" }), _jsx(_components.th, { children: "私" }), _jsx(_components.th, { children: "リンゴ" }), _jsx(_components.th, { children: "食べる" }), _jsx(_components.th, { children: "バナナ" }), _jsx(_components.th, { children: "買う" })] }) }), _jsxs(_components.tbody, { children: [_jsxs(_components.tr, { children: [_jsx(_components.td, { children: "私はリンゴを食べる。" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "0" })] }), _jsxs(_components.tr, { children: [_jsx(_components.td, { children: "私はバナナを食べる。" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "0" })] }), _jsxs(_components.tr, { children: [_jsx(_components.td, { children: "私はリンゴを買う。" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "1" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "1" })] })] })] }), "\n", _jsx(_components.p, { children: "このようにすることで、文章を数値化することができる。この方法を「bag-of-words」という。" }), "\n", _jsx(_components.h3, { children: "文章のベクトル化方法 TF-IDF" }), "\n", _jsxs(_components.p, { children: ["bag-of-wordsはシンプルでわかりやすいが、欠点も存在する。 そのひとつに、", _jsx(_components.strong, { children: "文章の特徴をうまく捉えられない場合がある" }), "ことが挙げられる。上記の例では「私」と「リンゴ」の重みは同じであるが、「私」は全ての文章で出てきているので「リンゴ」より文章の特徴としては弱い。そこで、", _jsx(_components.strong, { children: "単語の頻度に全文章中の単語の出現頻度(つまりレア度)を考慮したTF-IDF" }), "を紹介する。TF-IDF値の算出式は以下の通りである。(様々なサイトを見たが、TF-IDFの式は細かいところで表記揺れがよくある。大まかなところは共通しているので、参考程度に見ていただきたい。)"] }), "\n", _jsx(_components.div, { className: "math math-display", children: _jsx(_components.span, { className: "katex-display", children: _jsxs(_components.span, { className: "katex", children: [_jsx(_components.span, { className: "katex-mathml", children: _jsx(_components.math, { xmlns: "http://www.w3.org/1998/Math/MathML", display: "block", children: _jsxs(_components.semantics, { children: [_jsxs(_components.mrow, { children: [_jsx(_components.mi, { children: "T" }), _jsx(_components.mi, { children: "F" }), _jsx(_components.mi, { children: "I" }), _jsx(_components.mi, { children: "D" }), _jsx(_components.mi, { children: "F" }), _jsx(_components.mo, { stretchy: "false", children: "(" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mo, { separator: "true", children: "," }), _jsx(_components.mi, { children: "d" }), _jsx(_components.mo, { stretchy: "false", children: ")" }), _jsx(_components.mo, { children: "=" }), _jsx(_components.mi, { children: "T" }), _jsx(_components.mi, { children: "F" }), _jsx(_components.mo, { stretchy: "false", children: "(" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mo, { separator: "true", children: "," }), _jsx(_components.mi, { children: "d" }), _jsx(_components.mo, { stretchy: "false", children: ")" }), _jsx(_components.mo, { children: "×" }), _jsx(_components.mi, { children: "I" }), _jsx(_components.mi, { children: "D" }), _jsx(_components.mi, { children: "F" }), _jsx(_components.mo, { stretchy: "false", children: "(" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mo, { stretchy: "false", children: ")" })] }), _jsx(_components.annotation, { encoding: "application/x-tex", children: " TFIDF(t,d)=TF(t,d) \\times IDF(t) " })] }) }) }), _jsxs(_components.span, { className: "katex-html", "aria-hidden": "true", children: [_jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "1em", verticalAlign: "-0.25em" } }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.13889em" }, children: "TF" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.07847em" }, children: "I" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.02778em" }, children: "D" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.13889em" }, children: "F" }), _jsx(_components.span, { className: "mopen", children: "(" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mpunct", children: "," }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.1667em" } }), _jsx(_components.span, { className: "mord mathnormal", children: "d" }), _jsx(_components.span, { className: "mclose", children: ")" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2778em" } }), _jsx(_components.span, { className: "mrel", children: "=" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2778em" } })] }), _jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "1em", verticalAlign: "-0.25em" } }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.13889em" }, children: "TF" }), _jsx(_components.span, { className: "mopen", children: "(" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mpunct", children: "," }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.1667em" } }), _jsx(_components.span, { className: "mord mathnormal", children: "d" }), _jsx(_components.span, { className: "mclose", children: ")" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2222em" } }), _jsx(_components.span, { className: "mbin", children: "×" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2222em" } })] }), _jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "1em", verticalAlign: "-0.25em" } }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.07847em" }, children: "I" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.02778em" }, children: "D" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.13889em" }, children: "F" }), _jsx(_components.span, { className: "mopen", children: "(" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mclose", children: ")" })] })] })] }) }) }), "\n", _jsx(_components.p, { children: "ただし、" }), "\n", _jsx(_components.div, { className: "math math-display", children: _jsx(_components.span, { className: "katex-display", children: _jsxs(_components.span, { className: "katex", children: [_jsx(_components.span, { className: "katex-mathml", children: _jsx(_components.math, { xmlns: "http://www.w3.org/1998/Math/MathML", display: "block", children: _jsxs(_components.semantics, { children: [_jsxs(_components.mrow, { children: [_jsx(_components.mi, { children: "T" }), _jsx(_components.mi, { children: "F" }), _jsx(_components.mo, { stretchy: "false", children: "(" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mo, { separator: "true", children: "," }), _jsx(_components.mi, { children: "d" }), _jsx(_components.mo, { stretchy: "false", children: ")" }), _jsx(_components.mo, { children: "=" }), _jsxs(_components.mfrac, { children: [_jsxs(_components.mrow, { children: [_jsx(_components.mtext, { children: "文章" }), _jsx(_components.mi, { children: "d" }), _jsx(_components.mtext, { children: "の中の単語" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mtext, { children: "の出現数" })] }), _jsxs(_components.mrow, { children: [_jsx(_components.mtext, { children: "文書" }), _jsx(_components.mi, { children: "d" }), _jsx(_components.mtext, { children: "の単語総数" })] })] }), _jsx(_components.mspace, { linebreak: "newline" }), _jsx(_components.mi, { children: "I" }), _jsx(_components.mi, { children: "D" }), _jsx(_components.mi, { children: "F" }), _jsx(_components.mo, { stretchy: "false", children: "(" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mo, { stretchy: "false", children: ")" }), _jsx(_components.mo, { children: "=" }), _jsxs(_components.msub, { children: [_jsxs(_components.mrow, { children: [_jsx(_components.mi, { children: "log" }), _jsx(_components.mo, { children: "⁡" })] }), _jsx(_components.mn, { children: "2" })] }), _jsxs(_components.mrow, { children: [_jsx(_components.mo, { fence: "true", children: "(" }), _jsxs(_components.mfrac, { children: [_jsx(_components.mtext, { children: "全文書数" }), _jsxs(_components.mrow, { children: [_jsx(_components.mtext, { children: "単語" }), _jsx(_components.mi, { children: "t" }), _jsx(_components.mtext, { children: "が出現した文書数" })] })] }), _jsx(_components.mo, { fence: "true", children: ")" })] }), _jsx(_components.mo, { children: "+" }), _jsx(_components.mn, { children: "1" })] }), _jsx(_components.annotation, { encoding: "application/x-tex", children: " TF(t,d)=\\frac{文章dの中の単語tの出現数}{文書dの単語総数}\\\\ IDF(t)=\\log_2\\left(\\frac{全文書数}{単語tが出現した文書数}\\right)+1 " })] }) }) }), _jsxs(_components.span, { className: "katex-html", "aria-hidden": "true", children: [_jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "1em", verticalAlign: "-0.25em" } }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.13889em" }, children: "TF" }), _jsx(_components.span, { className: "mopen", children: "(" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mpunct", children: "," }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.1667em" } }), _jsx(_components.span, { className: "mord mathnormal", children: "d" }), _jsx(_components.span, { className: "mclose", children: ")" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2778em" } }), _jsx(_components.span, { className: "mrel", children: "=" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2778em" } })] }), _jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "2.0574em", verticalAlign: "-0.686em" } }), _jsxs(_components.span, { className: "mord", children: [_jsx(_components.span, { className: "mopen nulldelimiter" }), _jsx(_components.span, { className: "mfrac", children: _jsxs(_components.span, { className: "vlist-t vlist-t2", children: [_jsxs(_components.span, { className: "vlist-r", children: [_jsxs(_components.span, { className: "vlist", style: { height: "1.3714em" }, children: [_jsxs(_components.span, { style: { top: "-2.314em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "3em" } }), _jsxs(_components.span, { className: "mord", children: [_jsx(_components.span, { className: "mord cjk_fallback", children: "文書" }), _jsx(_components.span, { className: "mord mathnormal", children: "d" }), _jsx(_components.span, { className: "mord cjk_fallback", children: "の単語総数" })] })] }), _jsxs(_components.span, { style: { top: "-3.23em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "3em" } }), _jsx(_components.span, { className: "frac-line", style: { borderBottomWidth: "0.04em" } })] }), _jsxs(_components.span, { style: { top: "-3.677em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "3em" } }), _jsxs(_components.span, { className: "mord", children: [_jsx(_components.span, { className: "mord cjk_fallback", children: "文章" }), _jsx(_components.span, { className: "mord mathnormal", children: "d" }), _jsx(_components.span, { className: "mord cjk_fallback", children: "の中の単語" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mord cjk_fallback", children: "の出現数" })] })] })] }), _jsx(_components.span, { className: "vlist-s", children: "​" })] }), _jsx(_components.span, { className: "vlist-r", children: _jsx(_components.span, { className: "vlist", style: { height: "0.686em" }, children: _jsx(_components.span, {}) }) })] }) }), _jsx(_components.span, { className: "mclose nulldelimiter" })] })] }), _jsx(_components.span, { className: "mspace newline" }), _jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "1em", verticalAlign: "-0.25em" } }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.07847em" }, children: "I" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.02778em" }, children: "D" }), _jsx(_components.span, { className: "mord mathnormal", style: { marginRight: "0.13889em" }, children: "F" }), _jsx(_components.span, { className: "mopen", children: "(" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mclose", children: ")" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2778em" } }), _jsx(_components.span, { className: "mrel", children: "=" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2778em" } })] }), _jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "2.4em", verticalAlign: "-0.95em" } }), _jsxs(_components.span, { className: "mop", children: [_jsxs(_components.span, { className: "mop", children: ["lo", _jsx(_components.span, { style: { marginRight: "0.01389em" }, children: "g" })] }), _jsx(_components.span, { className: "msupsub", children: _jsxs(_components.span, { className: "vlist-t vlist-t2", children: [_jsxs(_components.span, { className: "vlist-r", children: [_jsx(_components.span, { className: "vlist", style: { height: "0.207em" }, children: _jsxs(_components.span, { style: { top: "-2.4559em", marginRight: "0.05em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "2.7em" } }), _jsx(_components.span, { className: "sizing reset-size6 size3 mtight", children: _jsx(_components.span, { className: "mord mtight", children: "2" }) })] }) }), _jsx(_components.span, { className: "vlist-s", children: "​" })] }), _jsx(_components.span, { className: "vlist-r", children: _jsx(_components.span, { className: "vlist", style: { height: "0.2441em" }, children: _jsx(_components.span, {}) }) })] }) })] }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.1667em" } }), _jsxs(_components.span, { className: "minner", children: [_jsx(_components.span, { className: "mopen delimcenter", style: { top: "0em" }, children: _jsx(_components.span, { className: "delimsizing size3", children: "(" }) }), _jsxs(_components.span, { className: "mord", children: [_jsx(_components.span, { className: "mopen nulldelimiter" }), _jsx(_components.span, { className: "mfrac", children: _jsxs(_components.span, { className: "vlist-t vlist-t2", children: [_jsxs(_components.span, { className: "vlist-r", children: [_jsxs(_components.span, { className: "vlist", style: { height: "1.3603em" }, children: [_jsxs(_components.span, { style: { top: "-2.314em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "3em" } }), _jsxs(_components.span, { className: "mord", children: [_jsx(_components.span, { className: "mord cjk_fallback", children: "単語" }), _jsx(_components.span, { className: "mord mathnormal", children: "t" }), _jsx(_components.span, { className: "mord cjk_fallback", children: "が出現した文書数" })] })] }), _jsxs(_components.span, { style: { top: "-3.23em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "3em" } }), _jsx(_components.span, { className: "frac-line", style: { borderBottomWidth: "0.04em" } })] }), _jsxs(_components.span, { style: { top: "-3.677em" }, children: [_jsx(_components.span, { className: "pstrut", style: { height: "3em" } }), _jsx(_components.span, { className: "mord", children: _jsx(_components.span, { className: "mord cjk_fallback", children: "全文書数" }) })] })] }), _jsx(_components.span, { className: "vlist-s", children: "​" })] }), _jsx(_components.span, { className: "vlist-r", children: _jsx(_components.span, { className: "vlist", style: { height: "0.686em" }, children: _jsx(_components.span, {}) }) })] }) }), _jsx(_components.span, { className: "mclose nulldelimiter" })] }), _jsx(_components.span, { className: "mclose delimcenter", style: { top: "0em" }, children: _jsx(_components.span, { className: "delimsizing size3", children: ")" }) })] }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2222em" } }), _jsx(_components.span, { className: "mbin", children: "+" }), _jsx(_components.span, { className: "mspace", style: { marginRight: "0.2222em" } })] }), _jsxs(_components.span, { className: "base", children: [_jsx(_components.span, { className: "strut", style: { height: "0.6444em" } }), _jsx(_components.span, { className: "mord", children: "1" })] })] })] }) }) }), "\n", _jsx(_components.p, { children: "簡単に言えば、TFは1文章中の単語の頻度、IDFは全文章中、単語が含まれる文書の頻度の逆数(にlogをとって1を足したもの)である。" }), "\n", _jsx(_components.p, { children: "TF-IDFを計算した結果を以下の表にまとめた。" }), "\n", _jsxs(_components.table, { children: [_jsx(_components.thead, { children: _jsxs(_components.tr, { children: [_jsx(_components.th, { children: "文章" }), _jsx(_components.th, { children: "私" }), _jsx(_components.th, { children: "リンゴ" }), _jsx(_components.th, { children: "食べる" }), _jsx(_components.th, { children: "バナナ" }), _jsx(_components.th, { children: "買う" })] }) }), _jsxs(_components.tbody, { children: [_jsxs(_components.tr, { children: [_jsx(_components.td, { children: "私はリンゴを食べる。" }), _jsx(_components.td, { children: "0.33" }), _jsx(_components.td, { children: "0.53" }), _jsx(_components.td, { children: "0.53" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "0" })] }), _jsxs(_components.tr, { children: [_jsx(_components.td, { children: "私はバナナを食べる。" }), _jsx(_components.td, { children: "0.33" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "0.53" }), _jsx(_components.td, { children: "0.86" }), _jsx(_components.td, { children: "0" })] }), _jsxs(_components.tr, { children: [_jsx(_components.td, { children: "私はリンゴを買う。" }), _jsx(_components.td, { children: "0.33" }), _jsx(_components.td, { children: "0.53" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "0" }), _jsx(_components.td, { children: "0.86" })] })] })] }), "\n", _jsx(_components.p, { children: "文章全てに出てきている「私」より、「リンゴ」、「バナナ」のほうが値が高くなっている。" }), "\n", _jsx(_components.p, { children: _jsx(_components.strong, { children: "TF-IDFを使用すれば、他の文章では出てこない特徴のある単語の値を大きくすることができる。" }) }), "\n", _jsx(_components.h3, { children: "bag-of-wordsの実装" }), "\n", _jsx(_components.p, { children: "実際にpythonでbag-of-wordsとTF-IDFの実装を試みる。使うモジュールはMeCabとgensim(とpandas)である。" }), "\n", _jsx(_components.p, { children: "まずは、形態素解析を実施して、使用する単語を抽出する。最初に必要なモジュールと関数を定義する。" }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: [_jsx(_components.span, { className: "hljs-keyword", children: "import" }), " MeCab\n", _jsx(_components.span, { className: "hljs-keyword", children: "import" }), " pandas ", _jsx(_components.span, { className: "hljs-keyword", children: "as" }), " pd\n", _jsx(_components.span, { className: "hljs-keyword", children: "import" }), " gensim\n\n", _jsx(_components.span, { className: "hljs-keyword", children: "def" }), " ", _jsx(_components.span, { className: "hljs-title function_", children: "parse" }), "(", _jsx(_components.span, { className: "hljs-params", children: "tweet" }), "):\n t_list=[]\n t=MeCab.Tagger()\n temp1=t.parse(tweet)\n temp2=temp1.split(", _jsx(_components.span, { className: "hljs-string", children: "\"\\n\"" }), ")\n ", _jsx(_components.span, { className: "hljs-keyword", children: "for" }), " word ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " temp2:\n ", _jsx(_components.span, { className: "hljs-keyword", children: "if" }), " word ", _jsx(_components.span, { className: "hljs-keyword", children: "not" }), " ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " [", _jsx(_components.span, { className: "hljs-string", children: "\"EOS\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"\"" }), "]: \n word_sp=word.split(", _jsx(_components.span, { className: "hljs-string", children: "\"\\t\"" }), ")\n word_sp=word_sp[:", _jsx(_components.span, { className: "hljs-number", children: "1" }), "]+word_sp[", _jsx(_components.span, { className: "hljs-number", children: "1" }), "].split(", _jsx(_components.span, { className: "hljs-string", children: "\",\"" }), ")[:", _jsx(_components.span, { className: "hljs-number", children: "7" }), "]\n t_list.append(word_sp)\n ", _jsx(_components.span, { className: "hljs-keyword", children: "return" }), " t_list\n"] }) }), "\n", _jsx(_components.p, { children: "parseの実行結果はこんな感じ。" }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: ["parse_doc=parse(", _jsx(_components.span, { className: "hljs-string", children: "\"私はリンゴを食べる。\"" }), ") parse_doc\n"] }) }), "\n", _jsx(_components.p, { children: "結果" }), "\n", _jsx(_components.pre, { children: _jsx(_components.code, { className: "hljs language-text", children: "[['私', '名詞', '代名詞', '一般', '*', '*', '*', '私'], ['は', '助詞', '係助詞', '*', '*', '*', '*', 'は'], ['リンゴ', '名詞', '一般', '*', '*', '*', '*', 'リンゴ'], ['を', '助詞', '格助詞', '一般', '*', '*', '*', 'を'], ['食べる', '動詞', '自立', '*', '*', '一段', '基本形', '食べる'], ['。', '記号', '句点', '*', '*', '*', '*', '。']]\n" }) }), "\n", _jsx(_components.p, { children: "品詞情報までリストに格納している。これを使用して、テキストから名詞、動詞、形容詞を抽出する。" }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: ["texts=[", _jsx(_components.span, { className: "hljs-string", children: "\"私はリンゴを食べる。\"" }), ", ", _jsx(_components.span, { className: "hljs-string", children: "\"私はバナナを食べる。\"" }), ", ", _jsx(_components.span, { className: "hljs-string", children: "\"私はリンゴを買う。\"" }), "]\ndocs=[[w[", _jsx(_components.span, { className: "hljs-number", children: "7" }), "]\n", _jsx(_components.span, { className: "hljs-keyword", children: "for" }), " w ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " parse(text)\n", _jsx(_components.span, { className: "hljs-keyword", children: "if" }), " w[", _jsx(_components.span, { className: "hljs-number", children: "1" }), "] ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " [", _jsx(_components.span, { className: "hljs-string", children: "\"名詞\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"動詞\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"形容詞\"" }), "]]\n", _jsx(_components.span, { className: "hljs-keyword", children: "for" }), " text ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), "\ntexts] docs\n"] }) }), "\n", _jsx(_components.p, { children: "結果" }), "\n", _jsx(_components.pre, { children: _jsx(_components.code, { className: "hljs language-text", children: "[['私', 'リンゴ', '食べる'], ['私', 'バナナ', '食べる'], ['私', 'リンゴ', '買う']]\n" }) }), "\n", _jsx(_components.p, { children: "次に、gensimを使用してbowを計算する。gensimを使用する際の基本的な流れは、以下の通りである。" }), "\n", _jsxs(_components.ol, { children: ["\n", _jsx(_components.li, { children: "辞書を作成し、bowを計算し、コーパスを作成" }), "\n"] }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: ["dictionary = gensim.corpora.Dictionary(docs)\ncorpus = [dictionary.doc2bow(doc) ", _jsx(_components.span, { className: "hljs-keyword", children: "for" }), " doc ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " docs]\n"] }) }), "\n", _jsxs(_components.ol, { start: "3", children: ["\n", _jsx(_components.li, { children: "コーパスを使用してベクトル化を行う。" }), "\n"] }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: ["vector_bow=gensim.matutils.corpus2dense(corpus, num_terms=", _jsx(_components.span, { className: "hljs-built_in", children: "len" }), "(dictionary)).T ", _jsx(_components.span, { className: "hljs-comment", children: "# これを入れないとdictionary.id2tokenが生成されない。 dictionary[0]" }), "\nword=[dictionary.id2token[i]\n\n\n", _jsx(_components.span, { className: "hljs-keyword", children: "for" }), " i ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " ", _jsx(_components.span, { className: "hljs-built_in", children: "range" }), "(", _jsx(_components.span, { className: "hljs-built_in", children: "len" }), "(dictionary))] df_bow=\npd.DataFrame(vector_bow,columns=word)[[", _jsx(_components.span, { className: "hljs-string", children: "\"私\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"リンゴ\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"食べる\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"バナナ\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"買う\"" }), "]] df_bow\n"] }) }), "\n", _jsx(_components.p, { children: "結果" }), "\n", _jsx(_components.p, { children: _jsx(_components.img, { src: "/images/blog13/bow1.png", alt: "bow1", title: "bow1" }) }), "\n", _jsx(_components.p, { children: "gensimを使用すれば、簡単にbowを使用することができる。" }), "\n", _jsx(_components.h3, { children: "TF-IDFの実装" }), "\n", _jsx(_components.p, { children: "gensimを使用すると、TF-IDFも簡単に実装できる。しかし、上記で示したTF-IDFの式とは少し違うので、合わせるには少しカスタマイズする必要がある。" }), "\n", _jsxs(_components.ol, { children: ["\n", _jsx(_components.li, { children: "TF-IDFを計算し、コーパスを作成" }), "\n"] }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: [_jsx(_components.span, { className: "hljs-comment", children: "# gensimではTF=単語頻度より、文章中の全単語数を割る必要がある。" }), "\n", _jsx(_components.span, { className: "hljs-keyword", children: "def" }), " ", _jsx(_components.span, { className: "hljs-title function_", children: "wlocal" }), "(", _jsx(_components.span, { className: "hljs-params", children: "tf" }), "):\n ", _jsx(_components.span, { className: "hljs-keyword", children: "return" }), " tf/tf.", _jsx(_components.span, { className: "hljs-built_in", children: "sum" }), "() ", _jsx(_components.span, { className: "hljs-comment", children: "# gensimではIDFに1を足していないので、「add=1」を加える。" }), "\n ", _jsx(_components.span, { className: "hljs-keyword", children: "def" }), " ", _jsx(_components.span, { className: "hljs-title function_", children: "new_df2idf" }), "(", _jsx(_components.span, { className: "hljs-params", children: "docfreq, totaldocs" }), "):\n ", _jsx(_components.span, { className: "hljs-keyword", children: "return" }), " gensim.models.tfidfmodel.df2idf(docfreq, totaldocs,add=", _jsx(_components.span, { className: "hljs-number", children: "1" }), ") tfidf = gensim.models.TfidfModel(corpus,normalize = ", _jsx(_components.span, { className: "hljs-literal", children: "False" }), ",wlocal=wlocal,wglobal=new_df2idf)\n corpus_tfidf = tfidf[corpus]\n"] }) }), "\n", _jsxs(_components.ol, { start: "3", children: ["\n", _jsx(_components.li, { children: "コーパスを使用してベクトル化を行う。" }), "\n"] }), "\n", _jsx(_components.pre, { children: _jsxs(_components.code, { className: "hljs language-python", children: ["word=[dictionary.id2token[i]\n\n\n", _jsx(_components.span, { className: "hljs-keyword", children: "for" }), " i ", _jsx(_components.span, { className: "hljs-keyword", children: "in" }), " ", _jsx(_components.span, { className: "hljs-built_in", children: "range" }), "(", _jsx(_components.span, { className: "hljs-built_in", children: "len" }), "(dictionary))] vector_tfidf=gensim.matutils.corpus2dense(corpus_tfidf, num_terms=", _jsx(_components.span, { className: "hljs-built_in", children: "len" }), "(dictionary)).T df_tfidf=\npd.DataFrame(vector_tfidf,columns=word)[[", _jsx(_components.span, { className: "hljs-string", children: "\"私\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"リンゴ\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"食べる\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"バナナ\"" }), ",", _jsx(_components.span, { className: "hljs-string", children: "\"買う\"" }), "]] df_tfidf\n"] }) }), "\n", _jsx(_components.p, { children: "結果" }), "\n", _jsx(_components.p, { children: _jsx(_components.img, { src: "/images/blog13/bow2.png", alt: "bow2", title: "bow2" }) }), "\n", _jsx(_components.p, { children: "ちなみに、gensim.models.TfidfModelの引数のwlocalはTFに、wglobalはIDFに関数を適応させるための引数である。" }), "\n", _jsx(_components.p, { children: "次回はこのベクトルを使用して文章の類似度を測ってみる。" })] }); } function MDXContent(props = {}) { const {wrapper: MDXLayout} = Object.assign({}, _provideComponents(), props.components); return MDXLayout ? _jsx(MDXLayout, Object.assign({}, props, { children: _jsx(_createMdxContent, props) })) : _createMdxContent(props); } return { default: MDXContent }; a:["$","div",null,{"className":"container mx-auto px-4 py-8 max-w-7xl","children":["$","div",null,{"className":"flex flex-col md:flex-row gap-8","children":[["$","article",null,{"className":"w-full md:w-[70%] bg-white shadow-lg rounded-lg p-8","children":[["$","header",null,{"className":"mb-8 border-b pb-4","children":[["$","div",null,{"className":"flex items-center gap-2 mb-2","children":[["$","$L6",null,{"href":"/","className":"text-gray-500 hover:text-orange-500","children":"Home"}],["$","span",null,{"className":"text-gray-300","children":">"}],["$","span",null,{"className":"text-gray-500","children":"$undefined"}]]}],["$","h1",null,{"className":"text-3xl font-bold mb-4","children":"Pythonで文章の近さを計算しよう2(bag-of-words/TF-IDF)"}],["$","div",null,{"className":"text-gray-500 flex flex-wrap gap-4 items-center mb-6","children":[["$","time",null,{"className":"flex items-center gap-1 text-sm","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","height":"18px","viewBox":"0 0 24 24","width":"18px","fill":"#666666","className":"mr-1","children":[["$","path",null,{"d":"M0 0h24v24H0V0z","fill":"none"}],["$","path",null,{"d":"M19 3h-1V1h-2v2H8V1H6v2H5c-1.11 0-2 .9-2 2v14c0 1.1.89 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm0 16H5V8h14v11zM7 10h5v5H7z"}]]}],"日付: ","2019-11-17"]}],["$","div",null,{"className":"flex flex-wrap gap-2","children":[["$","$L6","自然言語処理",{"href":"/tags/自然言語処理","className":"bg-gray-100 hover:bg-blue-100 px-2 py-1 rounded text-sm text-gray-600 hover:text-blue-600 transition-colors","children":["#","自然言語処理"]}]]}]]}]]}],["$","div",null,{"className":"prose max-w-none prose-headings:border-b prose-headings:pb-2 prose-a:text-orange-600","children":["$","$Lb",null,{"compiledSource":"$c","frontmatter":{},"scope":{}}]}],["$","div",null,{"className":"flex justify-between items-center mt-12","children":[["$","div",null,{"className":"flex-1","children":["$","$L6",null,{"href":"/blog16","className":"group flex flex-col items-start p-4 rounded-lg border border-gray-100 hover:border-orange-200 hover:bg-orange-50 transition-all","children":[["$","span",null,{"className":"text-sm text-gray-500 mb-1 group-hover:text-orange-600 flex items-center","children":[["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","className":"h-4 w-4 mr-1","fill":"none","viewBox":"0 0 24 24","stroke":"currentColor","children":["$","path",null,{"strokeLinecap":"round","strokeLinejoin":"round","strokeWidth":2,"d":"M15 19l-7-7 7-7"}]}],"Previous"]}],["$","span",null,{"className":"font-medium text-gray-800 dark:text-gray-200 line-clamp-2","children":"欠損ありデータの平均・分散共分散の推定1"}]]}]}],["$","div",null,{"className":"w-8"}],["$","div",null,{"className":"flex-1 text-right","children":["$","$L6",null,{"href":"/blog14","className":"group flex flex-col items-end p-4 rounded-lg border border-gray-100 hover:border-orange-200 hover:bg-orange-50 transition-all","children":[["$","span",null,{"className":"text-sm text-gray-500 mb-1 group-hover:text-orange-600 flex items-center","children":["Next",["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","className":"h-4 w-4 ml-1","fill":"none","viewBox":"0 0 24 24","stroke":"currentColor","children":["$","path",null,{"strokeLinecap":"round","strokeLinejoin":"round","strokeWidth":2,"d":"M9 5l7 7-7 7"}]}]]}],["$","span",null,{"className":"font-medium text-gray-800 dark:text-gray-200 line-clamp-2","children":"Pythonで文章の近さを計算しよう3(cos類似度)"}]]}]}]]}],["$","$Ld",null,{}]]}],["$","div",null,{"className":"w-full md:w-[30%]","children":"$Le"}]]}]}] 9:null f:I[9009,["326","static/chunks/326-ead410bae2047633.js","986","static/chunks/986-f27c5a2c4d841870.js","42","static/chunks/app/%5Bslug%5D/page-419f452b4066bb25.js"],""] e:["$","aside",null,{"className":"w-full flex flex-col gap-6","children":[["$","div",null,{"className":"bg-white p-6 rounded shadow-sm border","children":[["$","h3",null,{"className":"font-bold text-lg mb-4 text-[#494949] border-b pb-2","children":"プロフィール"}],["$","div",null,{"className":"flex items-center mb-4","children":[["$","$Lf",null,{}],["$","span",null,{"className":"font-bold","children":"Shingo.S"}]]}],["$","p",null,{"className":"mb-4 text-base leading-relaxed text-gray-600","children":["データサイエンティストとして働いています。仕事では主にPythonやSAS、Rを用いて分析しています。",["$","br",null,{}],["$","br",null,{}],"統計検定1級、Kaggle Expert。",["$","br",null,{}],"自然言語処理に興味があります。"]}],["$","div",null,{"className":"mt-4 flex items-center","children":["$","a",null,{"href":"https://x.com/shingo97358922","target":"_blank","rel":"noopener noreferrer","className":"flex items-center gap-2 text-gray-600 hover:text-black transition-colors","children":[["$","svg",null,{"className":"w-5 h-5","fill":"currentColor","viewBox":"0 0 24 24","aria-hidden":"true","children":["$","path",null,{"d":"M18.244 2.25h3.308l-7.227 8.26 8.502 11.24H16.17l-5.214-6.817L4.99 21.75H1.68l7.73-8.835L1.254 2.25H8.08l4.713 6.231zm-1.161 17.52h1.833L7.084 4.126H5.117z"}]}],["$","span",null,{"className":"text-sm font-medium","children":"@shingo97358922"}]]}]}]]}],["$","div",null,{"className":"bg-white p-6 rounded shadow-sm border","children":[["$","h3",null,{"className":"font-bold text-lg mb-4 text-[#494949] border-b pb-2","children":"アクセスカウンター"}],["$","div",null,{"className":"flex flex-col gap-2 text-gray-600 pl-2","children":[["$","div",null,{"className":"flex items-center gap-4","children":[["$","span",null,{"className":"w-12 text-right","children":"累計"}],["$","span",null,{"children":":"}],["$","span",null,{"className":"font-mono text-lg font-bold","children":"72320"}]]}],["$","div",null,{"className":"flex items-center gap-4","children":[["$","span",null,{"className":"w-12 text-right","children":"本日"}],["$","span",null,{"children":":"}],["$","span",null,{"className":"font-mono text-lg font-bold","children":"17"}]]}],["$","div",null,{"className":"flex items-center gap-4","children":[["$","span",null,{"className":"w-12 text-right","children":"昨日"}],["$","span",null,{"children":":"}],["$","span",null,{"className":"font-mono text-lg font-bold","children":"33"}]]}]]}]]}],["$","div",null,{"className":"bg-white p-6 rounded shadow-sm border","children":[["$","h3",null,{"className":"font-bold text-lg mb-4 text-[#494949] border-b pb-2","children":"月別アーカイブ"}],["$","ul",null,{"className":"text-base text-gray-600","children":[["$","li","2026年2月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2026-02","className":"hover:text-blue-500 transition-colors block","children":["2026年2月"," (",1,")"]}]}],["$","li","2025年2月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2025-02","className":"hover:text-blue-500 transition-colors block","children":["2025年2月"," (",1,")"]}]}],["$","li","2024年10月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2024-10","className":"hover:text-blue-500 transition-colors block","children":["2024年10月"," (",1,")"]}]}],["$","li","2024年7月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2024-07","className":"hover:text-blue-500 transition-colors block","children":["2024年7月"," (",1,")"]}]}],["$","li","2024年2月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2024-02","className":"hover:text-blue-500 transition-colors block","children":["2024年2月"," (",1,")"]}]}],["$","li","2023年6月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2023-06","className":"hover:text-blue-500 transition-colors block","children":["2023年6月"," (",1,")"]}]}],["$","li","2023年5月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2023-05","className":"hover:text-blue-500 transition-colors block","children":["2023年5月"," (",1,")"]}]}],["$","li","2023年3月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2023-03","className":"hover:text-blue-500 transition-colors block","children":["2023年3月"," (",2,")"]}]}],["$","li","2022年10月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2022-10","className":"hover:text-blue-500 transition-colors block","children":["2022年10月"," (",2,")"]}]}],["$","li","2022年9月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2022-09","className":"hover:text-blue-500 transition-colors block","children":["2022年9月"," (",2,")"]}]}],["$","li","2022年7月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2022-07","className":"hover:text-blue-500 transition-colors block","children":["2022年7月"," (",1,")"]}]}],["$","li","2022年3月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2022-03","className":"hover:text-blue-500 transition-colors block","children":["2022年3月"," (",1,")"]}]}],["$","li","2021年11月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2021-11","className":"hover:text-blue-500 transition-colors block","children":["2021年11月"," (",1,")"]}]}],["$","li","2021年10月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2021-10","className":"hover:text-blue-500 transition-colors block","children":["2021年10月"," (",2,")"]}]}],["$","li","2021年7月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2021-07","className":"hover:text-blue-500 transition-colors block","children":["2021年7月"," (",2,")"]}]}],["$","li","2021年5月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2021-05","className":"hover:text-blue-500 transition-colors block","children":["2021年5月"," (",2,")"]}]}],["$","li","2021年4月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2021-04","className":"hover:text-blue-500 transition-colors block","children":["2021年4月"," (",2,")"]}]}],["$","li","2021年2月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2021-02","className":"hover:text-blue-500 transition-colors block","children":["2021年2月"," (",1,")"]}]}],["$","li","2020年10月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-10","className":"hover:text-blue-500 transition-colors block","children":["2020年10月"," (",1,")"]}]}],["$","li","2020年9月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-09","className":"hover:text-blue-500 transition-colors block","children":["2020年9月"," (",1,")"]}]}],["$","li","2020年7月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-07","className":"hover:text-blue-500 transition-colors block","children":["2020年7月"," (",1,")"]}]}],["$","li","2020年6月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-06","className":"hover:text-blue-500 transition-colors block","children":["2020年6月"," (",1,")"]}]}],["$","li","2020年5月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-05","className":"hover:text-blue-500 transition-colors block","children":["2020年5月"," (",1,")"]}]}],["$","li","2020年4月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-04","className":"hover:text-blue-500 transition-colors block","children":["2020年4月"," (",1,")"]}]}],["$","li","2020年3月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-03","className":"hover:text-blue-500 transition-colors block","children":["2020年3月"," (",1,")"]}]}],["$","li","2020年2月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-02","className":"hover:text-blue-500 transition-colors block","children":["2020年2月"," (",1,")"]}]}],["$","li","2020年1月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2020-01","className":"hover:text-blue-500 transition-colors block","children":["2020年1月"," (",1,")"]}]}],["$","li","2019年12月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-12","className":"hover:text-blue-500 transition-colors block","children":["2019年12月"," (",1,")"]}]}],["$","li","2019年11月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-11","className":"hover:text-blue-500 transition-colors block","children":["2019年11月"," (",2,")"]}]}],["$","li","2019年10月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-10","className":"hover:text-blue-500 transition-colors block","children":["2019年10月"," (",2,")"]}]}],["$","li","2019年9月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-09","className":"hover:text-blue-500 transition-colors block","children":["2019年9月"," (",1,")"]}]}],["$","li","2019年8月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-08","className":"hover:text-blue-500 transition-colors block","children":["2019年8月"," (",8,")"]}]}],["$","li","2019年7月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-07","className":"hover:text-blue-500 transition-colors block","children":["2019年7月"," (",1,")"]}]}],["$","li","2019年6月",{"className":"mb-2 border-b border-dashed border-gray-200 pb-1 last:border-none","children":["$","$L6",null,{"href":"/archives/2019-06","className":"hover:text-blue-500 transition-colors block","children":["2019年6月"," (",2,")"]}]}]]}]]}],["$","div",null,{"className":"bg-white p-6 rounded shadow-sm border","children":[["$","h3",null,{"className":"font-bold text-lg mb-4 text-[#494949] border-b pb-2","children":"タグ一覧"}],["$","div",null,{"className":"flex flex-wrap gap-2","children":[["$","$L6","データ分析",{"href":"/tags/データ分析","className":"bg-gray-100 hover:bg-blue-500 hover:text-white rounded px-2 py-1 text-sm transition-colors duration-200 text-gray-600","children":["データ分析"," (",18,")"]}],["$","$L6","自然言語処理",{"href":"/tags/自然言語処理","className":"bg-gray-100 hover:bg-blue-500 hover:text-white rounded px-2 py-1 text-sm transition-colors duration-200 text-gray-600","children":["自然言語処理"," (",16,")"]}],["$","$L6","SAS",{"href":"/tags/SAS","className":"bg-gray-100 hover:bg-blue-500 hover:text-white rounded px-2 py-1 text-sm transition-colors duration-200 text-gray-600","children":["SAS"," (",12,")"]}],["$","$L6","Kaggle",{"href":"/tags/Kaggle","className":"bg-gray-100 hover:bg-blue-500 hover:text-white rounded px-2 py-1 text-sm transition-colors duration-200 text-gray-600","children":["Kaggle"," (",3,")"]}],["$","$L6","雑談",{"href":"/tags/雑談","className":"bg-gray-100 hover:bg-blue-500 hover:text-white rounded px-2 py-1 text-sm transition-colors duration-200 text-gray-600","children":["雑談"," (",2,")"]}]]}]]}],["$","div",null,{"className":"bg-white p-6 rounded shadow-sm border","children":[["$","h3",null,{"className":"font-bold text-lg mb-4 text-[#494949] border-b pb-2","children":"広告枠"}],["$","div",null,{"className":"flex justify-center items-center","children":[["$","a",null,{"href":"https://px.a8.net/svt/ejp?a8mat=4AXI0F+CULTTE+348+6CWQP","rel":"nofollow","target":"_blank","children":["$","img",null,{"style":{"border":0},"width":"250","height":"250","alt":"","src":"https://www25.a8.net/svt/bgt?aid=260223855777&wid=001&eno=01&mid=s00000000404001068000&mc=1"}]}],["$","img",null,{"style":{"border":0},"width":"1","height":"1","src":"https://www17.a8.net/0.gif?a8mat=4AXI0F+CULTTE+348+6CWQP","alt":""}]]}]]}]]}]