Compare commits

..

No commits in common. "339195fda694e5dee7b2f8a827e32b722e54434b" and "b6daacca2d2aa203316b8668a1ec95c8656eddb7" have entirely different histories.

5 changed files with 13 additions and 14 deletions

View File

@ -102,8 +102,8 @@ docker-compose -p recipe-test down
``` ```
Test are written in pytest framework. Currently focused on unittest and code Test are written in pytest framework. Currently focused on unittest.
coverage. Integration tests to come. Integration tests to come.
To run test use: To run test use:
```sh ```sh

View File

@ -10,6 +10,5 @@ dependencies = [
"SQLAlchemy==1.4.39", "SQLAlchemy==1.4.39",
"python-dotenv==0.20.0", "python-dotenv==0.20.0",
"beautifulsoup4==4.11.1", "beautifulsoup4==4.11.1",
"psycopg2-binary==2.9.3", "psycopg2-binary==2.9.3"
"requests~=2.30.0"
] ]

View File

@ -12,7 +12,6 @@ pyparsing==3.0.9
pytest==7.1.3 pytest==7.1.3
pytest-cov==4.0.0 pytest-cov==4.0.0
python-dotenv==0.20.0 python-dotenv==0.20.0
requests~=2.30.0
soupsieve==2.3.2.post1 soupsieve==2.3.2.post1
SQLAlchemy==1.4.39 SQLAlchemy==1.4.39
tomli==2.0.1 tomli==2.0.1

View File

@ -4,7 +4,7 @@ import re
from sqlalchemy import select, desc, exists, not_, except_ from sqlalchemy import select, desc, exists, not_, except_
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
import bs4 import bs4
import requests as req from urllib.request import urlopen
from urllib.parse import urljoin from urllib.parse import urljoin
import logging import logging
from argparse import ArgumentParser from argparse import ArgumentParser
@ -73,22 +73,23 @@ def reparse_ingredients(session):
def load_page(recipe_url): def load_recipe(recipe_url):
try: try:
logging.info(f'Loading Page: {recipe_url}') logging.info(f'Loading Recipe: {recipe_url}')
with req.get(recipe_url) as f: with urlopen(recipe_url) as f:
if f.status_code == 404: if f.getcode() == 404:
raise Exception(f"Page does not exist (404): {recipe_url}") raise Exception(f"Recipe Does not exist: {recipe_url}")
return bs4.BeautifulSoup(f.read().decode(), 'html.parser') return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
except Exception as e: except Exception as e:
logging.warning(f"Could not download or parse recipe: {recipe_url}") logging.warning(f"Could not download or parse recipe: {recipe_url}")
logging.warning(e) logging.warning(e)
return None
def parse_recipe(session, recipe, site): def parse_recipe(session, recipe, site):
recipe_url = urljoin(site.base_url, str(recipe.identifier)) recipe_url = urljoin(site.base_url, str(recipe.identifier))
recipe_page = load_page(recipe_url) recipe_page = load_recipe(recipe_url)
if not recipe_page: if not recipe_page:
return None return None
@ -125,7 +126,7 @@ def parse_recipe(session, recipe, site):
return recipe return recipe
def main(): # pragma: no cover def main():
parser = ArgumentParser(description="Scrape a recipe site for recipies") parser = ArgumentParser(description="Scrape a recipe site for recipies")
parser.add_argument('site', parser.add_argument('site',
help='Name of site') help='Name of site')

View File

@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
import pytest import pytest
def test_load_page(): def test_load_recipe():
page = scrape.load_recipe("https://hs.andreistoica.ca:4943") page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
assert type(page) == BeautifulSoup assert type(page) == BeautifulSoup