import re
from urllib.parse import urlparse

#10000文字ごとにテキストを分割
def split_text(text, chunk_size=10000):
    chunks = []
    for i in range(0, len(text), chunk_size):
        chunk = text[i:i+chunk_size]
        chunks.append(chunk)
    return chunks

#ファイルURLを「*」で分割→会員IDとそれ以外に分割。
def split_file_text(file_name):
    # 正規表現で会員IDを抽出
    match = re.search(r'\*(\S+)', file_name)

    if match:
        member_id = match.group(1)
        print(f"PigeonIDが見つかりました。{member_id}")
        return member_id
    else:
        print("Pigeon task IDが見つかりませんでした。")

#ファイルURLを「host」「path」の2つに分割
def split_url_host_and_path(url):
    # URLを解析
    parsed_url = urlparse(url)
    # ドメイン部分（スキームとネットロケーション）
    host = f"{parsed_url.scheme}://{parsed_url.netloc}"
    # ドメイン以下のパス
    path = parsed_url.path

    return host,path