From ffcb6a12df71f7f05f013c62c30c9366b15531ec Mon Sep 17 00:00:00 2001 From: Gregory Rudolph Date: Sun, 1 Aug 2021 14:13:49 -0400 Subject: [PATCH] Start of webscraping --- Posting.cs | 60 ++++++++++++++++++++++++++++++++++++++++ StateJobsNYSubmit.csproj | 1 + 2 files changed, 61 insertions(+) create mode 100644 Posting.cs diff --git a/Posting.cs b/Posting.cs new file mode 100644 index 0000000..3bf614c --- /dev/null +++ b/Posting.cs @@ -0,0 +1,60 @@ +using System.Net.Http; +using System.Net.Http.Headers; +using System.Threading.Tasks; +using System.Net; +using System.Text; +using System.IO; +using System; +using HtmlAgilityPack; + + +public class Address +{ + public string Street { get; set; } + public string City { get; set; } + public string State { get; set; } + public string ZipCode { get; set; } +} + +public class Posting +{ + public string Agency { get; set; } + public string Title { get; set; } + public string OccupationalCategory { get; set; } + public string SalaryGrade { get; set; } + public string BargainingUnit { get; set; } + public string SalaryRange { get; set; } + public string EmploymentType { get; set; } + public string AppointmentType { get; set; } + public string JurisdictionalClass { get; set; } + public string TravelPercentage { get; set; } + public string MinimumQualifications { get; set; } + public string DutiesDescription { get; set; } + public string ContactName { get; set; } + public string ContactEmailAddress { get; set; } + public Address LocationAddress { get; set; } + public Address ContactAddress { get; set; } + public string NotesOnApplying { get; set; } + public string VacancyID { get; set; } + public DateTime DatePosted { get; set; } + public DateTime DateDue { get; set; } + + public Posting(string id) + { + string fullUrl = $"https://statejobs.ny.gov/employees/vacancyDetailsView.cfm?id={id}"; + using (var client = new HttpClient()) + { + var response = client.GetAsync(fullUrl).Result; + + if (response.IsSuccessStatusCode) + { + var responseContent = response.Content; + + // by calling .Result you are synchronously reading the result + string responseString = responseContent.ReadAsStringAsync().Result; + HtmlDocument htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(responseString); + } + } + } +} \ No newline at end of file diff --git a/StateJobsNYSubmit.csproj b/StateJobsNYSubmit.csproj index 42ccfbc..a9be8cc 100644 --- a/StateJobsNYSubmit.csproj +++ b/StateJobsNYSubmit.csproj @@ -5,6 +5,7 @@ +