diff --git a/.env b/.env index e69de29..b530907 100644 --- a/.env +++ b/.env @@ -0,0 +1,2 @@ +APP_ENV=dev +MESSENGER_TRANSPORT_DSN=doctrine://default diff --git a/.env.example b/.env.example index 81e3a74..1f545e4 100644 --- a/.env.example +++ b/.env.example @@ -11,9 +11,6 @@ # DO NOT DEFINE PRODUCTION SECRETS IN THIS FILE NOR IN ANY OTHER COMMITTED FILES. # https://symfony.com/doc/current/configuration/secrets.html -APP_ENV=dev -MESSENGER_TRANSPORT_DSN=doctrine://default - # Generate your own value with php -r "echo bin2hex(random_bytes(16)) . PHP_EOL;" APP_SECRET=TODO diff --git a/composer.json b/composer.json index 0610866..4f6ed0e 100644 --- a/composer.json +++ b/composer.json @@ -22,7 +22,9 @@ "simpod/doctrine-utcdatetime": "^0.2.0", "symfony/apache-pack": "^1.0", "symfony/console": "6.2.*", + "symfony/css-selector": "6.2.*", "symfony/doctrine-messenger": "6.2.*", + "symfony/dom-crawler": "6.2.*", "symfony/dotenv": "6.2.*", "symfony/flex": "^2", "symfony/framework-bundle": "6.2.*", diff --git a/composer.lock b/composer.lock index 7f427b2..d62a063 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "7a39e53db29b1bb2087ed0a9f6c52e51", + "content-hash": "ec60a70816b9a72f22376bc30fd698a2", "packages": [ { "name": "doctrine/cache", @@ -1620,6 +1620,75 @@ ], "time": "2022-12-08T02:08:23+00:00" }, + { + "name": "masterminds/html5", + "version": "2.7.6", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "897eb517a343a2281f11bc5556d6548db7d93947" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/897eb517a343a2281f11bc5556d6548db7d93947", + "reference": "897eb517a343a2281f11bc5556d6548db7d93947", + "shasum": "" + }, + "require": { + "ext-ctype": "*", + "ext-dom": "*", + "ext-libxml": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35 || ^5.7.21 || ^6 || ^7" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.7-dev" + } + }, + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ], + "support": { + "issues": "https://github.com/Masterminds/html5-php/issues", + "source": "https://github.com/Masterminds/html5-php/tree/2.7.6" + }, + "time": "2022-08-18T16:18:26+00:00" + }, { "name": "meteo-concept/hcaptcha-bundle", "version": "v3.3.0", @@ -2811,6 +2880,71 @@ ], "time": "2022-12-28T14:26:22+00:00" }, + { + "name": "symfony/css-selector", + "version": "v6.2.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/css-selector.git", + "reference": "bf1b9d4ad8b1cf0dbde8b08e0135a2f6259b9ba1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/css-selector/zipball/bf1b9d4ad8b1cf0dbde8b08e0135a2f6259b9ba1", + "reference": "bf1b9d4ad8b1cf0dbde8b08e0135a2f6259b9ba1", + "shasum": "" + }, + "require": { + "php": ">=8.1" + }, + "type": "library", + "autoload": { + "psr-4": { + "Symfony\\Component\\CssSelector\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Jean-François Simon", + "email": "jeanfrancois.simon@sensiolabs.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Converts CSS selectors to XPath expressions", + "homepage": "https://symfony.com", + "support": { + "source": "https://github.com/symfony/css-selector/tree/v6.2.5" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2023-01-01T08:38:09+00:00" + }, { "name": "symfony/dependency-injection", "version": "v6.2.3", @@ -3152,6 +3286,76 @@ ], "time": "2022-11-04T07:42:34+00:00" }, + { + "name": "symfony/dom-crawler", + "version": "v6.2.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/dom-crawler.git", + "reference": "19aa4962a0687e96941f0bdb27b794c5b73e2394" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/dom-crawler/zipball/19aa4962a0687e96941f0bdb27b794c5b73e2394", + "reference": "19aa4962a0687e96941f0bdb27b794c5b73e2394", + "shasum": "" + }, + "require": { + "masterminds/html5": "^2.6", + "php": ">=8.1", + "symfony/polyfill-ctype": "~1.8", + "symfony/polyfill-mbstring": "~1.0" + }, + "require-dev": { + "symfony/css-selector": "^5.4|^6.0" + }, + "suggest": { + "symfony/css-selector": "" + }, + "type": "library", + "autoload": { + "psr-4": { + "Symfony\\Component\\DomCrawler\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Eases DOM navigation for HTML and XML documents", + "homepage": "https://symfony.com", + "support": { + "source": "https://github.com/symfony/dom-crawler/tree/v6.2.5" + }, + "funding": [ + { + "url": "https://symfony.com/sponsor", + "type": "custom" + }, + { + "url": "https://github.com/fabpot", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", + "type": "tidelift" + } + ], + "time": "2023-01-20T17:45:48+00:00" + }, { "name": "symfony/dotenv", "version": "v6.2.0", diff --git a/src/Command/ClinicsImportCommand.php b/src/Command/ClinicsImportCommand.php index 898f331..f37749c 100644 --- a/src/Command/ClinicsImportCommand.php +++ b/src/Command/ClinicsImportCommand.php @@ -5,8 +5,10 @@ namespace App\Command; use App\DataSource\DataSourceException; use App\DataSource\DataSourceInterface; use App\DataSource\ErinReedDataSource; +use App\DataSource\TransInTheSouthDataSource; use App\Entity\Clinic; use App\Entity\ImportHash; +use App\HereMaps\Client as HereClient; use App\Repository\ClinicRepository; use App\Repository\ImportHashRepository; use Symfony\Component\Console\Attribute\AsCommand; @@ -23,19 +25,23 @@ use Symfony\Contracts\HttpClient\HttpClientInterface; class ClinicsImportCommand extends Command { private HttpClientInterface $httpClient; + private HereClient $hereClient; private ClinicRepository $clinics; private ImportHashRepository $imports; private array $dataSources = [ - ErinReedDataSource::class + ErinReedDataSource::class, + TransInTheSouthDataSource::class, ]; public function __construct( HttpClientInterface $httpClient, + HereClient $hereClient, ClinicRepository $clinics, ImportHashRepository $imports, ) { $this->httpClient = $httpClient; + $this->hereClient = $hereClient; $this->clinics = $clinics; $this->imports = $imports; parent::__construct(); @@ -49,7 +55,7 @@ class ClinicsImportCommand extends Command $clinicsAddedCount = 0; foreach ($this->dataSources as $source) { /* @var DataSourceInterface $source */ - $source = new $source($this->httpClient); + $source = new $source($this->httpClient, $this->hereClient); $io->section($source->getType()); $io->text('Fetching clinics'); @@ -72,6 +78,7 @@ class ClinicsImportCommand extends Command continue; } + $source->preImport($new); $this->clinics->save($new); $import = new ImportHash(); diff --git a/src/Command/ResetCommand.php b/src/Command/ResetCommand.php index e16c12a..43f6b52 100644 --- a/src/Command/ResetCommand.php +++ b/src/Command/ResetCommand.php @@ -14,7 +14,7 @@ use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Style\SymfonyStyle; #[AsCommand( - name: 'app:reset', + name: 'app:clinics:reset', description: 'Dev tool used to reset the application database', )] class ResetCommand extends Command diff --git a/src/Controller/Admin/ClinicCrudController.php b/src/Controller/Admin/ClinicCrudController.php index e99be92..493bb40 100644 --- a/src/Controller/Admin/ClinicCrudController.php +++ b/src/Controller/Admin/ClinicCrudController.php @@ -21,6 +21,7 @@ use EasyCorp\Bundle\EasyAdminBundle\Filter\ChoiceFilter; use EasyCorp\Bundle\EasyAdminBundle\Router\AdminUrlGenerator; use Oefenweb\DamerauLevenshtein\DamerauLevenshtein as Levenshtein; use Symfony\Component\HttpFoundation\RedirectResponse; +use Symfony\Component\Validator\Constraints\Choice; class ClinicCrudController extends AbstractCrudController { @@ -76,7 +77,12 @@ class ClinicCrudController extends AbstractCrudController ->add(ChoiceFilter::new('published')->setChoices([ 'No' => false, 'Yes' => true - ])); + ])) + ->add(ChoiceFilter::new('dataSource')->canSelectMultiple()->setChoices([ + 'Trans in the South' => 'transInTheSouth', + 'Erin Reed' => 'erinReed', + ])) + ; } public function configureFields(string $pageName): iterable diff --git a/src/DataSource/DataSourceInterface.php b/src/DataSource/DataSourceInterface.php index 4a28bad..c56c900 100644 --- a/src/DataSource/DataSourceInterface.php +++ b/src/DataSource/DataSourceInterface.php @@ -3,16 +3,19 @@ namespace App\DataSource; use App\Entity\Clinic; +use App\HereMaps\Client; use Symfony\Contracts\HttpClient\HttpClientInterface; interface DataSourceInterface { public const DATASOURCE__ERIN_REED = 'erinReed'; + public const DATASOURCE__TRANS_IN_THE_SOUTH = 'transInTheSouth'; public const DATASOURCE__MANUAL_ENTRY = 'manualEntry'; - public function __construct(HttpClientInterface $httpClient); + public function __construct(HttpClientInterface $httpClient, Client $hereClient); public function getType(): string; /* @throws DataSourceException */ public function fetchClinics(): array; public function hash(Clinic $clinic): string; + public function preImport(Clinic $clinic): void; } \ No newline at end of file diff --git a/src/DataSource/ErinReedDataSource.php b/src/DataSource/ErinReedDataSource.php index 7d50458..0668d39 100644 --- a/src/DataSource/ErinReedDataSource.php +++ b/src/DataSource/ErinReedDataSource.php @@ -3,6 +3,7 @@ namespace App\DataSource; use App\Entity\Clinic; +use App\HereMaps\Client as HereClient; use Symfony\Contracts\HttpClient\HttpClientInterface; class ErinReedDataSource implements DataSourceInterface @@ -11,8 +12,10 @@ class ErinReedDataSource implements DataSourceInterface private HttpClientInterface $httpClient; - public function __construct(HttpClientInterface $httpClient) - { + public function __construct( + HttpClientInterface $httpClient, + HereClient $hereClient, + ) { $this->httpClient = $httpClient; } @@ -103,4 +106,9 @@ class ErinReedDataSource implements DataSourceInterface $data = implode( '.', $pieces ); return md5( $data ); } + + public function preImport(Clinic $clinic): void + { + // left intentionally blank + } } \ No newline at end of file diff --git a/src/DataSource/TransInTheSouthDataSource.php b/src/DataSource/TransInTheSouthDataSource.php new file mode 100644 index 0000000..93a038a --- /dev/null +++ b/src/DataSource/TransInTheSouthDataSource.php @@ -0,0 +1,159 @@ +httpClient = $httpClient; + $this->hereClient = $hereClient; + } + + public function getType(): string + { + return self::DATASOURCE__TRANS_IN_THE_SOUTH; + } + + /** + * @throws DataSourceException + */ + public function fetchClinics(): array + { + $tisProvidersId = $this->_scrapeTisProvidersId(); + $html = $this->_loadSearchResults($tisProvidersId); + $rawRecords = $this->_scrapeData($html); + + $clinics = []; + foreach ($rawRecords as $record) { + $clinic = new Clinic(); + $clinic->setName($record['name']); + $clinic->setDescription($record['description']); + $clinic->setAddress($record['address']); + $clinic->setDataSource($this->getType()); + $clinic->setPublished(false); + $clinics[] = $clinic; + } + + return $clinics; + } + + private function _scrapeTisProvidersId(): string + { + try { + $res = $this->httpClient->request('GET', 'https://southernequality.org/resources/transinthesouth/'); + $html = $res->getContent(); + } catch (\Throwable) { + throw new DataSourceException('HTTP request to fetch search form failed.', $this->getType()); + } + + if ($html === '') { + throw new DataSourceException('Missing web page content in response.', $this->getType()); + } + + $crawler = new Crawler($html); + return $crawler->filter('#filter-tis-providers')->first()->attr('value'); + } + + /** + * @throws DataSourceException + */ + private function _loadSearchResults(string $tisProvidersId): string + { + try { + $res = $this->httpClient->request('POST', 'https://southernequality.org/resources/transinthesouth/', [ + 'body' => 'tis-name-search=&states=&services%5B%5D=Informed+Consent&services%5B%5D=Offers+Hormone+Replacement+Therapy+%28HRT%29&filter-tis-providers=' . $tisProvidersId . '&_wp_http_referer=%2Fresources%2Ftransinthesouth%2F&filter_providers=Search', + ]); + $html = $res->getContent(); + } catch (\Throwable) { + throw new DataSourceException('HTTP request to fetch search results failed.', $this->getType()); + } + + if ($html === '') { + throw new DataSourceException('Missing web page content in response.', $this->getType()); + } + + return $html; + } + + /** + * @throws DataSourceException + */ + private function _scrapeData(string $html): array + { + $rawRecords = []; + + $crawler = new Crawler($html); + $crawler->filter('.provider')->each(function(Crawler $crawler) use (&$rawRecords) { + $services = []; + foreach ($crawler->filter('.accordion-header') as $service) { + $services[] = $service->textContent; + } + + if (!in_array('Offers Hormone Replacement Therapy (HRT)', $services) || !in_array('Informed Consent', $services)) { + return; + } + + $name = $crawler->filter('.provider--title')->text(); + if (!$name) { + throw new DataSourceException('Missing clinic attribute in scraped data: name', $this->getType()); + } + + $practice = $crawler->filter('.provider--practice-name')->text(); + if ($practice && ($name !== $practice)) { + $name .= ' - ' . $practice; + } + + $description = $crawler->filter('.provider--summary')->text(); + + $address = $crawler->filter('.provider--address')->text(); + if (!$address) { + throw new DataSourceException('Missing clinic attribute in scraped data: address', $this->getType()); + } + + $rawRecords[] = [ + 'name' => $name, + 'description' => $description, + 'address' => $address, + ]; + }); + + return $rawRecords; + } + + public function hash(Clinic $clinic): string + { + $pieces = [ + $clinic->getName(), + $clinic->getDescription(), + $clinic->getAddress(), + ]; + $data = implode('.', $pieces); + return md5($data); + } + + /** + * @throws \Exception + * @throws DataSourceException + */ + public function preImport(Clinic $clinic): void + { + $items = $this->hereClient->geocode($clinic->getAddress())['items']; + if (count($items) === 0) { + throw new DataSourceException('No coordinates found for address: ' . $clinic->getAddress(), $this->getType()); + } + + $clinic->setLatitude($items[0]['position']['lat']); + $clinic->setLongitude($items[0]['position']['lng']); + } +} diff --git a/src/HereMaps/Client.php b/src/HereMaps/Client.php index ca578eb..366885c 100644 --- a/src/HereMaps/Client.php +++ b/src/HereMaps/Client.php @@ -46,4 +46,30 @@ class Client { return $decoded; } + + /** + * @throws \Exception + */ + public function geocode(string $address): array + { + $url = 'https://geocode.search.hereapi.com/v1/geocode?' . http_build_query([ + 'lang' => 'en-US', + 'q' => $address, + 'apiKey' => $this->hereApiKey, + ]); + + try { + $res = $this->httpClient->request('GET', $url); + $data = $res->getContent(); + } catch (\Throwable $e) { + throw new \Exception('HTTP request to Here Maps failed: ' . $e->getMessage()); + } + + $decoded = json_decode($data, true); + if ($decoded === false) { + throw new \Exception('Failed to decode Here Maps response'); + } + + return $decoded; + } } \ No newline at end of file diff --git a/templates/search-form.html.twig b/templates/search-form.html.twig index 4263e8d..f35d411 100644 --- a/templates/search-form.html.twig +++ b/templates/search-form.html.twig @@ -24,6 +24,7 @@ Parameters:
{{ form_widget(searchForm.submit, {'attr': {'class': 'rounded-pill btn-primary'}}) }}
+ {{ form_widget(searchForm.page, {'attr': {'value': '1'}}) }} {{ form_end(searchForm) }} diff --git a/templates/search.html.twig b/templates/search.html.twig index 3f5de80..bb55e1b 100644 --- a/templates/search.html.twig +++ b/templates/search.html.twig @@ -49,12 +49,20 @@ {% if clinic.address %}

{{ clinic.address }}

{% endif %} - {% if clinic.dataSource == "erinReed" %} - - - Informed Consent + {% if clinic.dataSource != 'manualEntry' %} + + + {% if clinic.dataSource == "erinReed" %} + Erin Reed + {% elseif clinic.dataSource == 'transInTheSouth' %} + Trans in the South + {% endif %} {% endif %} + + + Informed Consent + {% if clinic.description %}

{{ clinic.description|raw }}