diff --git a/Posting.cs b/Posting.cs index f1a06a0..3c1977c 100644 --- a/Posting.cs +++ b/Posting.cs @@ -1,10 +1,5 @@ -using System.Net.Http; -using System.Net.Http.Headers; -using System.Threading.Tasks; -using System.Net; -using System.Text; -using System.IO; using System; +using System.Text.RegularExpressions; using HtmlAgilityPack; @@ -35,27 +30,62 @@ public class Posting public Address LocationAddress { get; set; } public Address ContactAddress { get; set; } public string NotesOnApplying { get; set; } + public string BoxNumber { get; set; } public string VacancyID { get; set; } public DateTime DatePosted { get; set; } public DateTime DateDue { get; set; } public Posting(string id) { - string fullUrl = $"https://statejobs.ny.gov/employees/vacancyDetailsPrint.cfm?id={id}"; - using (var client = new HttpClient()) - { - var response = client.GetAsync(fullUrl).Result; + string fullUrl = "https://statejobs.ny.gov/employees/vacancyDetailsPrint.cfm?id=" + id; + Console.WriteLine(fullUrl); + HtmlWeb web = new HtmlWeb(); + var htmlDoc = web.Load(fullUrl); + // extracting all links + int i = 0; + this.LocationAddress = new Address(); + this.ContactAddress = new Address(); + var node = htmlDoc.DocumentNode.SelectSingleNode("//*[@id=\"noNavContent\"]/p"); - if (response.IsSuccessStatusCode) + Regex r = new Regex(@"\d\d/\d\d/\d\d"); + MatchCollection matches = r.Matches(node.InnerHtml); + this.DatePosted = DateTime.Parse(matches[0].Value); + this.DateDue = DateTime.Parse(matches[1].Value); + Console.WriteLine($"Due: {this.DateDue}, posted: {this.DatePosted}"); + foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//span[@class=\"rightCol\"]")) + { + switch (i) { - var responseContent = response.Content; + case 0: this.Agency = link.InnerHtml.Trim(); break; + case 1: this.Title = link.InnerHtml.Trim(); break; + case 2: this.OccupationalCategory = link.InnerHtml.Trim(); break; + case 3: this.SalaryGrade = link.InnerHtml.Trim(); break; + case 4: this.BargainingUnit = link.InnerHtml.Trim(); break; + case 5: this.SalaryRange = link.InnerHtml.Trim(); break; + case 6: this.EmploymentType = link.InnerHtml.Trim(); break; + case 7: this.AppointmentType = link.InnerHtml.Trim(); break; + case 8: this.JurisdictionalClass = link.InnerHtml.Trim(); break; + case 9: this.TravelPercentage = link.InnerHtml.Trim(); break; + case 19: this.LocationAddress.Street = link.InnerHtml.Trim(); break; + case 21: this.LocationAddress.City = link.InnerHtml.Trim(); break; + case 22: this.LocationAddress.State = link.InnerHtml.Trim(); break; + case 23: this.LocationAddress.ZipCode = link.InnerHtml.Trim(); break; + case 24: this.MinimumQualifications = link.InnerHtml.Trim(); break; + case 25: this.DutiesDescription = link.InnerHtml.Trim(); break; + case 27: this.ContactName = link.InnerHtml.Trim(); break; + case 30: this.ContactEmailAddress = link.InnerHtml.Trim(); break; + case 31: this.ContactAddress.Street = link.InnerHtml.Trim(); break; + case 33: this.ContactAddress.City = link.InnerHtml.Trim(); break; + case 34: this.ContactAddress.State = link.InnerHtml.Trim(); break; + case 35: this.ContactAddress.ZipCode = link.InnerHtml.Trim(); break; + case 36: this.NotesOnApplying = link.InnerHtml.Trim(); break; - // by calling .Result you are synchronously reading the result - string responseString = responseContent.ReadAsStringAsync().Result; - HtmlDocument htmlDoc = new HtmlDocument(); - htmlDoc.LoadHtml(responseString); + default: + break; } + i++; } + this.BargainingUnit = this.BargainingUnit.Replace("&", "&"); this.VacancyID = id; } -} \ No newline at end of file +}