diff --git a/.gitignore b/.gitignore index 3e6122c..5cb5d8c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ __pycache__ *.csv .vscode/* !.vscode/settings.json -animes/* \ No newline at end of file +animes/* +temp \ No newline at end of file diff --git a/anixarttierlist/__init__.py b/anixarttierlist/__init__.py index b4efffc..48c89cb 100644 --- a/anixarttierlist/__init__.py +++ b/anixarttierlist/__init__.py @@ -1 +1,4 @@ +from anixarttierlist.api import download_image, get_image_url from anixarttierlist.group import group_by_common_part +from anixarttierlist.prepare import prepare +from anixarttierlist.tools import add_url_to_file diff --git a/anixarttierlist/prepare.py b/anixarttierlist/prepare.py new file mode 100644 index 0000000..4aec9f8 --- /dev/null +++ b/anixarttierlist/prepare.py @@ -0,0 +1,6 @@ +import os + + +def prepare(): + if not os.path.exists('./temp'): + os.mkdir('./temp') diff --git a/anixarttierlist/tools.py b/anixarttierlist/tools.py new file mode 100644 index 0000000..8490e52 --- /dev/null +++ b/anixarttierlist/tools.py @@ -0,0 +1,3 @@ +def add_url_to_file(url: str, path: str = './temp/urls.txt'): + with open(path, 'a+') as file: + file.write(url+'\n') diff --git a/main.py b/main.py index 357fcdb..04386b0 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,15 @@ import csv import sys +import time +from concurrent.futures import ThreadPoolExecutor from enum import Enum from typing import List -from anixarttierlist import group_by_common_part +import numpy as np +from tqdm import tqdm + +from anixarttierlist import (add_url_to_file, download_image, get_image_url, + prepare) class TableColumns(Enum): @@ -13,6 +19,21 @@ class TableColumns(Enum): ALT_NAME = 3 +def get_urls(names: List[List[str]]) -> List[str]: + for group in tqdm(names): + for name in tqdm(group): + url = get_image_url(name) + if url != '': + add_url_to_file(url) + download_image(url, name) + time.sleep(0.68) + + +def download_imgs(urls: List[str]): + with ThreadPoolExecutor(max_workers=5) as executor: + executor.map(download_image, urls) + + def main(file: str): anime_list_before_group: List[str] = [] with open(file, 'r', newline='') as csvfile: @@ -24,7 +45,9 @@ def main(file: str): anime_list_before_group.append( row[TableColumns.JAP_NAME.value]) # anime_list: List[str] = group_by_common_part(anime_list_before_group) # TODO: find the best algorithm for that - print(anime_list_before_group) + np_anime_list = np.array(anime_list_before_group) + res = np.array_split(np_anime_list, np.ceil(len(np_anime_list) / 5)) + get_urls(res) if __name__ == '__main__': @@ -32,4 +55,5 @@ if __name__ == '__main__': if len(argv) != 2: print('Error! Run program with file name: \'python main.py file.csv\'') sys.exit(1) + prepare() sys.exit(main(argv[1])) diff --git a/poetry.lock b/poetry.lock index 3702b8a..2a84c9e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -112,6 +112,17 @@ files = [ {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, ] +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + [[package]] name = "idna" version = "3.10" @@ -344,6 +355,27 @@ files = [ {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, ] +[[package]] +name = "tqdm" +version = "4.67.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "urllib3" version = "2.3.0" @@ -364,4 +396,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.13" -content-hash = "d5d8cef7bb5dd015d1e20b1411a7a4be69d328e39623fc03436d126fbbf08f30" +content-hash = "d9ecdde32d7620058581bc8216cdd103b0436105b05e643a8167a48916c96bb1" diff --git a/pyproject.toml b/pyproject.toml index 6dac504..765712c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ package-mode = false python = "^3.13" scikit-learn = "^1.6.0" requests = "^2.32.3" +tqdm = "^4.67.1" [build-system]