using System; using System.Text.RegularExpressions; using HtmlAgilityPack; public class Address { public string Street { get; set; } public string City { get; set; } public string State { get; set; } public string ZipCode { get; set; } } public class Posting { public string Agency { get; set; } public string Title { get; set; } public string OccupationalCategory { get; set; } public string SalaryGrade { get; set; } public string BargainingUnit { get; set; } public string SalaryRange { get; set; } public string EmploymentType { get; set; } public string AppointmentType { get; set; } public string JurisdictionalClass { get; set; } public string TravelPercentage { get; set; } public string MinimumQualifications { get; set; } public string DutiesDescription { get; set; } public string ContactName { get; set; } public string ContactEmailAddress { get; set; } public Address LocationAddress { get; set; } public Address ContactAddress { get; set; } public string NotesOnApplying { get; set; } public string BoxNumber { get; set; } public string VacancyID { get; set; } public DateTime DatePosted { get; set; } public DateTime DateDue { get; set; } public Posting(string id) { string fullUrl = "https://statejobs.ny.gov/employees/vacancyDetailsPrint.cfm?id=" + id; HtmlWeb web = new HtmlWeb(); var htmlDoc = web.Load(fullUrl); // extracting all links int i = 0; this.LocationAddress = new Address(); this.ContactAddress = new Address(); var node = htmlDoc.DocumentNode.SelectSingleNode("//*[@id=\"noNavContent\"]/p"); Regex r = new Regex(@"\d\d/\d\d/\d\d"); MatchCollection matches = r.Matches(node.InnerHtml); this.DatePosted = DateTime.Parse(matches[0].Value); this.DateDue = DateTime.Parse(matches[1].Value); foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//span[@class=\"rightCol\"]")) { switch (i) { case 0: this.Agency = link.InnerHtml.Trim(); break; case 1: this.Title = link.InnerHtml.Trim(); break; case 2: this.OccupationalCategory = link.InnerHtml.Trim(); break; case 3: this.SalaryGrade = link.InnerHtml.Trim(); break; case 4: this.BargainingUnit = link.InnerHtml.Trim().Replace("&", "&"); break; case 5: this.SalaryRange = link.InnerHtml.Trim(); break; case 6: this.EmploymentType = link.InnerHtml.Trim(); break; case 7: this.AppointmentType = link.InnerHtml.Trim(); break; case 8: this.JurisdictionalClass = link.InnerHtml.Trim(); break; case 9: this.TravelPercentage = link.InnerHtml.Trim(); break; case 19: this.LocationAddress.Street = link.InnerHtml.Trim(); break; case 21: this.LocationAddress.City = link.InnerHtml.Trim(); break; case 22: this.LocationAddress.State = link.InnerHtml.Trim(); break; case 23: this.LocationAddress.ZipCode = link.InnerHtml.Trim(); break; case 24: this.MinimumQualifications = link.InnerHtml.Trim(); break; case 25: this.DutiesDescription = link.InnerHtml.Trim(); break; case 27: this.ContactName = link.InnerHtml.Trim(); break; case 30: this.ContactEmailAddress = link.InnerHtml.Trim(); break; case 31: this.ContactAddress.Street = link.InnerHtml.Trim(); break; case 33: this.ContactAddress.City = link.InnerHtml.Trim(); break; case 34: this.ContactAddress.State = link.InnerHtml.Trim(); break; case 35: this.ContactAddress.ZipCode = link.InnerHtml.Trim(); break; case 36: this.NotesOnApplying = link.InnerHtml.Trim(); break; default: break; } i++; } try { Regex boxRegex = new Regex(@"(Box \d+)"); this.BoxNumber = boxRegex.Matches(this.NotesOnApplying)[0].Value.Replace("Box ", ""); } catch (Exception) { } this.VacancyID = id; } public string GenerateLetter(string s, UserData data, string skills) { string ret = "" + s; ret = ret.Replace("$name", this.ContactName); ret = ret.Replace("$vacancy", this.VacancyID); ret = ret.Replace("$title", this.Title); if (!string.IsNullOrEmpty(this.BoxNumber)) { ret = ret.Replace("$box", this.BoxNumber); } if (!string.IsNullOrEmpty(data.email)) { ret = ret.Replace("$email", data.email); } string skillRet = "• " + skills.Replace(",", "• "); ret = ret.Replace("$skills", skillRet); return ret; } }