HTML
Installation
# you can use a Conda environment
pip install --extra-index-url https://oauth2accesstoken:$(gcloud auth print-access-token)@glsdk.gdplabs.id/gen-ai-internal/simple/ "gllm-docproc[html]"# you can use a Conda environment
$token = (gcloud auth print-access-token)
pip install --extra-index-url "https://oauth2accesstoken:$token@glsdk.gdplabs.id/gen-ai-internal/simple/" "gllm-docproc[html]"# you can use a Conda environment
FOR /F "tokens=*" %T IN ('gcloud auth print-access-token') DO SET TOKEN=%T
pip install --extra-index-url "https://oauth2accesstoken:%TOKEN%@glsdk.gdplabs.id/gen-ai-internal/simple/" "gllm-docproc[html]"1
import json
from gllm_docproc.parser.html import HTMLFlatParser
# read JSON file with content and element_metadata
file_path = "./data/source/loaded_elements.json"
with open(file_path, "r", encoding="utf-8") as f:
loaded_elements = json.load(f)
# initialize the HTML Flat Parser
parser = HTMLFlatParser()
# parse loaded elements
parsed_elements = parser.parse(loaded_elements)2
python main.py3
Last updated
Was this helpful?