This notebook fetches the original article from Hugging Face and recreates it locally so that all text, LaTeX equations, and images are rendered inside Markdown cells.
Just run the first code‑cell, wait a few seconds, and a new notebook named tgi_article.ipynb will be created in the same directory.
If you only want to read the article, open that generated notebook – this starter notebook simply performs the conversion for you.
# --- AUTOMATIC CONVERTER ------------------------------------------------------# This cell downloads the article, converts the HTML → Markdown, extracts LaTeX# equations, and writes a new notebook (`tgi_article.ipynb`) with one Markdown# cell per section and images embedded via their original URLs.# ------------------------------------------------------------------------------import requests, re, os, json, nbformat, textwrap, datetimefrom bs4 import BeautifulSoupfrom markdownify import markdownify as mdURL ="https://huggingface.co/blog/martinigoyanes/llm-inference-at-scale-with-tgi"print(f"Fetching article from {URL} …")resp = requests.get(URL, timeout=30)resp.raise_for_status()soup = BeautifulSoup(resp.text, "html.parser")# Grab the <article> element that holds the blog‑postarticle_tag = soup.find("article")if article_tag isNone:raiseRuntimeError("Could not locate <article> tag – the page structure may have changed")# Convert to Markdown – keep image tags intact.raw_md = md(str(article_tag), heading_style="ATX", bullets="*", strip=["noscript", "script",], convert=["br", "hr", "li", "a", "img"])# ------------------------------------------------------------# Basic clean‑ups: remove zero‑width spaces, tidy multiple blank lines# ------------------------------------------------------------raw_md = re.sub(r"\u200b", "", raw_md)raw_md = re.sub(r"\n{3,}", "\n\n", raw_md).strip()# ------------------------------------------------------------# Extract LaTeX‑style equations written inline as plain text and# wrap them in $$ … $$ so that Jupyter renders them.# We'll detect simple patterns like (2 * N * B * S) / FLOPs rate# and enclose anything that looks like a computation in $$.# ------------------------------------------------------------def latexify(match): expr = match.group(0)returnf"$$\n{expr}\n$$"pattern = re.compile(r"\([^\n]{6,80}?\)\s*/\s*[\w\-\(\)]+.*?=")raw_md = pattern.sub(latexify, raw_md)# Split into sections based on H1/H2 headings for separate notebook cellssections = re.split(r"(?<=\n)# ", raw_md)md_cells = []for idx, sec inenumerate(sections):if idx ==0: md_cells.append(sec.strip())else: md_cells.append("# "+ sec.strip())nb = nbformat.v4.new_notebook()nb["cells"] = [nbformat.v4.new_markdown_cell(c) for c in md_cells]nb["metadata"]["created_from"] = URLnb["metadata"]["generated_at"] =str(datetime.datetime.utcnow()) +"Z"out_path ="tgi_article.ipynb"withopen(out_path, "w", encoding="utf-8") as f: nbformat.write(nb, f)print(f"\n✅ Finished! The converted notebook is saved as: {out_path}")print("Open it to read the article with all images & equations rendered.")
---------------------------------------------------------------------------ModuleNotFoundError Traceback (most recent call last)
CellIn[1], line 7 1# --- AUTOMATIC CONVERTER ------------------------------------------------------ 2# This cell downloads the article, converts the HTML → Markdown, extracts LaTeX 3# equations, and writes a new notebook (`tgi_article.ipynb`) with one Markdown 4# cell per section and images embedded via their original URLs. 5# ----------------------------------------------------------------------------------> 7importrequests,re,os,json,nbformat,textwrap,datetime 8frombs4import BeautifulSoup
9frommarkdownifyimport markdownify as md
ModuleNotFoundError: No module named 'requests'
After the above cell runs you should see tgi_article.ipynb appear in the file browser.
Open it to enjoy the fully‑formatted article – happy reading!