From 14267a9a897e9e24abbf1449774f785d8e1a5e52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Pich?= Date: Sun, 7 May 2023 21:27:32 +0200 Subject: [PATCH] Fix previous workaround by handling fake http 404 errors --- sdk-hebe/api/sdk-hebe.api | 4 +++ sdk-scrapper/api/sdk-scrapper.api | 1 + .../sdk/scrapper/exception/VulcanException.kt | 2 +- .../scrapper/interceptor/ErrorInterceptor.kt | 30 +++++++++++-------- .../scrapper/repository/RegisterRepository.kt | 5 ++-- .../scrapper/repository/StudentRepository.kt | 1 - .../sdk/scrapper/service/ServiceManager.kt | 1 - 7 files changed, 26 insertions(+), 18 deletions(-) diff --git a/sdk-hebe/api/sdk-hebe.api b/sdk-hebe/api/sdk-hebe.api index 5b1eecdd..a137d5a0 100644 --- a/sdk-hebe/api/sdk-hebe.api +++ b/sdk-hebe/api/sdk-hebe.api @@ -29,6 +29,10 @@ public final class io/github/wulkanowy/sdk/hebe/exception/InvalidPinException : public fun ()V } +public final class io/github/wulkanowy/sdk/hebe/exception/InvalidSymbolException : java/io/IOException { + public fun ()V +} + public final class io/github/wulkanowy/sdk/hebe/exception/InvalidTokenException : java/io/IOException { public fun (Ljava/lang/String;)V } diff --git a/sdk-scrapper/api/sdk-scrapper.api b/sdk-scrapper/api/sdk-scrapper.api index 949acd99..5e33f0b1 100644 --- a/sdk-scrapper/api/sdk-scrapper.api +++ b/sdk-scrapper/api/sdk-scrapper.api @@ -389,6 +389,7 @@ public class io/github/wulkanowy/sdk/scrapper/exception/PasswordResetErrorExcept } public class io/github/wulkanowy/sdk/scrapper/exception/ScrapperException : java/io/IOException { + public final fun getCode ()I } public final class io/github/wulkanowy/sdk/scrapper/exception/ServiceUnavailableException : io/github/wulkanowy/sdk/scrapper/exception/VulcanException { diff --git a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/exception/VulcanException.kt b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/exception/VulcanException.kt index eed76971..63e94fcb 100644 --- a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/exception/VulcanException.kt +++ b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/exception/VulcanException.kt @@ -1,3 +1,3 @@ package io.github.wulkanowy.sdk.scrapper.exception -open class VulcanException internal constructor(message: String) : ScrapperException(message) +open class VulcanException internal constructor(message: String, httpCode: Int = -1) : ScrapperException(message, httpCode) diff --git a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/interceptor/ErrorInterceptor.kt b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/interceptor/ErrorInterceptor.kt index 2f7d547e..50916cd0 100644 --- a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/interceptor/ErrorInterceptor.kt +++ b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/interceptor/ErrorInterceptor.kt @@ -16,6 +16,7 @@ import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.slf4j.LoggerFactory import java.net.CookieManager +import java.net.HttpURLConnection.HTTP_NOT_FOUND internal class ErrorInterceptor( private val cookies: CookieManager, @@ -31,16 +32,23 @@ internal class ErrorInterceptor( if (response.body?.contentType()?.subtype != "json") { val url = response.request.url.toString() - checkForError(Jsoup.parse(response.peekBody(Long.MAX_VALUE).byteStream(), null, url), url) + checkForError( + doc = Jsoup.parse(response.peekBody(Long.MAX_VALUE).byteStream(), null, url), + redirectUrl = url, + httpCode = response.code, + ) } return response } - private fun checkForError(doc: Document, redirectUrl: String) { + private fun checkForError(doc: Document, redirectUrl: String, httpCode: Int) { doc.select(".errorBlock").let { if (it.isNotEmpty()) { - throw VulcanException("${it.select(".errorTitle").text()}. ${it.select(".errorMessage").text()}") + when (val title = it.select(".errorTitle").text()) { + "HTTP Error 404" -> throw ScrapperException(title, HTTP_NOT_FOUND) + else -> throw VulcanException("$title. ${it.select(".errorMessage").text()}", httpCode) + } } } @@ -55,7 +63,7 @@ internal class ErrorInterceptor( doc.select("#MainPage_ErrorDiv div").let { if (it.text().contains("Trwa aktualizacja bazy danych")) throw ServiceUnavailableException(it.last()?.ownText().orEmpty()) if (it.last()?.ownText()?.contains("czasowo wyłączona") == true) throw TemporarilyDisabledException(it.last()?.ownText().orEmpty()) - if (it.isNotEmpty()) throw VulcanException(it[0].ownText()) + if (it.isNotEmpty()) throw VulcanException(it[0].ownText(), httpCode) } doc.select("h2.error").let { @@ -76,26 +84,24 @@ internal class ErrorInterceptor( } when (doc.title()) { - "Błąd" -> throw VulcanException(doc.body().text()) - "Błąd strony" -> throw VulcanException(doc.select(".errorMessage").text()) + "Błąd" -> throw VulcanException(doc.body().text(), httpCode) + "Błąd strony" -> throw VulcanException(doc.select(".errorMessage").text(), httpCode) "Logowanie" -> throw AccountPermissionException(doc.select("div").last()?.ownText().orEmpty().split(" Jeśli")[0]) "Login Service" -> { cookies.cookieStore.removeAll() // workaround for very strange (random) errors - throw ScrapperException(doc.select("#MainDiv > div").text()) + throw ScrapperException(doc.select("#MainDiv > div").text(), httpCode) } "Połączenie zablokowane" -> throw ConnectionBlockedException(doc.body().text()) "Just a moment..." -> if (doc.select(".footer").text().contains("Cloudflare")) { throw ConnectionBlockedException(doc.select("#challenge-body-text").text()) } "Przerwa techniczna" -> throw ServiceUnavailableException(doc.title()) - "Strona nie została odnaleziona" -> throw ScrapperException(doc.title()) - "Strona nie znaleziona" -> throw ScrapperException(doc.selectFirst("div div")?.text().orEmpty()) + "Strona nie została odnaleziona" -> throw ScrapperException(doc.title(), httpCode) + "Strona nie znaleziona" -> throw ScrapperException(doc.selectFirst("div div")?.text().orEmpty(), httpCode) } - doc.select("h2").text().let { - if (it == "Strona nie znaleziona") throw ScrapperException(it) + if (it == "Strona nie znaleziona") throw ScrapperException(it, httpCode) } - if (isBobCmn(doc, redirectUrl)) { throw ConnectionBlockedException("Połączenie zablokowane przez system antybotowy. Spróbuj ponownie za chwilę") } diff --git a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/RegisterRepository.kt b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/RegisterRepository.kt index d3f9eae6..2a8eb005 100644 --- a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/RegisterRepository.kt +++ b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/RegisterRepository.kt @@ -35,7 +35,6 @@ import org.jsoup.Jsoup import org.jsoup.parser.Parser import org.jsoup.select.Elements import org.slf4j.LoggerFactory -import retrofit2.HttpException import java.net.HttpURLConnection import java.nio.charset.StandardCharsets @@ -252,10 +251,10 @@ internal class RegisterRepository( private suspend fun getStudentCache(): CacheResponse? { val startPage = runCatching { - student.getStart("App") + student.getStart(url.generate(UrlGenerator.Site.STUDENT) + "App") }.recoverCatching { if (it is ScrapperException && it.code == HttpURLConnection.HTTP_NOT_FOUND) { - student.getStart("Start") + student.getStart(url.generate(UrlGenerator.Site.STUDENT) + "Start") } else throw it }.getOrThrow() diff --git a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/StudentRepository.kt b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/StudentRepository.kt index ea258ba6..3f264708 100644 --- a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/StudentRepository.kt +++ b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/repository/StudentRepository.kt @@ -56,7 +56,6 @@ import io.github.wulkanowy.sdk.scrapper.timetable.mapTimetableHeaders import io.github.wulkanowy.sdk.scrapper.timetable.mapTimetableList import io.github.wulkanowy.sdk.scrapper.toFormat import org.jsoup.Jsoup -import retrofit2.HttpException import java.net.HttpURLConnection.HTTP_NOT_FOUND import java.time.LocalDate diff --git a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/service/ServiceManager.kt b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/service/ServiceManager.kt index d8a00a7a..ca2f907e 100644 --- a/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/service/ServiceManager.kt +++ b/sdk-scrapper/src/main/kotlin/io/github/wulkanowy/sdk/scrapper/service/ServiceManager.kt @@ -176,7 +176,6 @@ internal class ServiceManager( return getRetrofit(getClientBuilder(), urlGenerator.generate(UrlGenerator.Site.HOME), json = true).create() } - @OptIn(ExperimentalSerializationApi::class) private fun getRetrofit(client: OkHttpClient.Builder, baseUrl: String, json: Boolean = false) = Retrofit.Builder() .baseUrl(baseUrl) .client(client.build())