Skip to content

Commit

Permalink
Module update to scrap the new ICSD site
Browse files Browse the repository at this point in the history
  • Loading branch information
yannisalexiou committed Mar 15, 2019
1 parent 435501a commit b346081
Show file tree
Hide file tree
Showing 6 changed files with 451 additions and 540 deletions.
180 changes: 89 additions & 91 deletions courses/course.js
Original file line number Diff line number Diff line change
@@ -1,119 +1,117 @@
const request = require('request'); //Helps us make HTTP calls
const cheerio = require('cheerio');

const generalSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)';
const firstFixTableSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(2)>td';
const courseWebsiteSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(5)>td';
const contentOutlineSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(8)>td>p'
const learningOutcomesSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(11)>td>p';
const prerequisitesSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(14)>td>p';
const basicTextbooksSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(17)>td>p';
const additionalReferencesSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(20)>td>p';
const teachingMethodSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(23)>td>p';
const grandingMethodSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(26)>td>p';
const languageOfInstructionSelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(29)>td';
const modeOfDeliverySelector = 'table.wrapper>tbody>tr>td>center:nth-child(10)>table>tbody>tr:nth-child(1)>td>center>table>tbody>tr>td:nth-child(2)>table>tbody>tr:nth-child(32)>td>p';
const generalSelector = 'div.tab-content';
const contentOutlineSelector = 'div.tab-content>div:nth-child(3)>div:nth-child(2)>p'
const learningOutcomesSelector = 'div.tab-content>div:nth-child(4)>div:nth-child(2)>';
const prerequisitesSelector = 'div.tab-content>div:nth-child(5)>div:nth-child(2)>p';
const basicTextbooksSelector = 'div.tab-content>div:nth-child(6)>div:nth-child(2)>';
const additionalReferencesSelector = 'div.tab-content>div:nth-child(7)>div:nth-child(2)>';
const teachingMethodSelector = 'div.tab-content>div:nth-child(8)>div:nth-child(2)>p';
const grandingMethodSelector = 'div.tab-content>div:nth-child(9)>div:nth-child(2)>p';
const languageOfInstructionSelector = 'div.tab-content>div:nth-child(10)>div:nth-child(2)';
const modeOfDeliverySelector = 'div.tab-content>div:nth-child(11)>div:nth-child(2)>p';

function requestCourseDetails(url) {
return new Promise(function (resolve, reject) {
request(url, function (error, res, body) {
if (!error && res.statusCode == 200) {
resolve(body);
} else {
reject(error);
}
return new Promise(function (resolve, reject) {
request(url, function (error, res, body) {
if (!error && res.statusCode == 200) {
resolve(body);
} else {
reject(error);
}
});
});
});
}

async function getAllCourseDetails(url) {
let html = await requestCourseDetails(url);
var allCourseDetailsList = allCourseDetails(html, url);
return allCourseDetailsList
let html = await requestCourseDetails(url);
var allCourseDetailsList = allCourseDetails(html, url);
return allCourseDetailsList
}

async function getBasicCourseDetails(url) {
let html = await requestCourseDetails(url);
var basicCourseDetailsList = basicCourseData(html, url);
return basicCourseDetailsList;
let html = await requestCourseDetails(url);
var basicCourseDetailsList = basicCourseData(html, url);
return basicCourseDetailsList;
}

async function getAdvancedCourseDetails(url) {
let html = await requestCourseDetails(url);
var advancedCourseDetailsList = advancedCourseData(html);
return advancedCourseDetailsList;
let html = await requestCourseDetails(url);
var advancedCourseDetailsList = advancedCourseData(html, url);
return advancedCourseDetailsList;
}

function allCourseDetails(html, url) {
var basicData = basicCourseData(html, url);
var advancedData = advancedCourseData(html);
var basicData = basicCourseData(html, url);
var advancedData = advancedCourseData(html, url);

var finalData = Object.assign(basicData, advancedData);
var finalData = Object.assign(basicData, advancedData);

return finalData;
return finalData;
}

function basicCourseData(html, url) {
var $ = cheerio.load(html);
var firstFixTable = $('table.mathima>tbody>tr').filter(function() {
var data = $(this);
return data;
});

var title = firstFixTable.children().children().eq(1).text();
var code = firstFixTable.children().children().eq(3).text();
var semester = firstFixTable.children().children().eq(5).text();
var ects = firstFixTable.children().children().eq(7).text();
var theoryHours = firstFixTable.children().children().eq(9).text();
var labHours = firstFixTable.children().children().eq(11).text();
var professor = firstFixTable.children().children().eq(13).text();

var basicData = {
title: title,
code: code,
semester: semester,
ects: ects,
theoryHours: theoryHours,
labHours: labHours,
professor: professor,
link: url
}

return basicData;
var $ = cheerio.load(html);
var firstFixTable = $('table.table>tbody').filter(function () {
var data = $(this);
return data;
});

var title = firstFixTable.children().children().eq(1).text();
var code = firstFixTable.children().children().eq(3).text();
var semester = firstFixTable.children().children().eq(5).text();
var ects = firstFixTable.children().children().eq(7).text();
var theoryHours = firstFixTable.children().children().eq(9).text();
var labHours = firstFixTable.children().children().eq(11).text();
var professor = firstFixTable.children().children().eq(13).text();

var basicData = {
title: title,
code: code,
semester: semester,
ects: ects,
theoryHours: theoryHours,
labHours: labHours,
professor: professor,
link: url
}

return basicData;
}

function advancedCourseData(html) {
var $ = cheerio.load(html);

var courseWebsite = $(courseWebsiteSelector).text();
var contentOutline = $(contentOutlineSelector).text();
var learningOutcomes = $(learningOutcomesSelector).text();
var prerequisites = $(prerequisitesSelector).text();
var basicTextbooks = $(basicTextbooksSelector).text();
var additionalReferences = $(additionalReferencesSelector).text();
var teachingMethod = $(teachingMethodSelector).text();
var grandingMethod = $(grandingMethodSelector).text();
var languageOfInstruction = $(languageOfInstructionSelector).text();
var modeOfDelivery = $(modeOfDeliverySelector).text();

var advancedData = {
courseWebsite: courseWebsite,
contentOutline: contentOutline,
learningOutcomes: learningOutcomes,
prerequisites: prerequisites,
basicTextbooks: basicTextbooks,
additionalReferences: additionalReferences,
teachingMethod: teachingMethod,
grandingMethod: grandingMethod,
languageOfInstruction: languageOfInstruction,
modeOfDelivery: modeOfDelivery
}

return advancedData;
function advancedCourseData(html, url) {
var $ = cheerio.load(html);

var courseWebsite = url;
var contentOutline = $(contentOutlineSelector).text();
var learningOutcomes = $(learningOutcomesSelector).text();
var prerequisites = $(prerequisitesSelector).text();
var basicTextbooks = $(basicTextbooksSelector).text();
var additionalReferences = $(additionalReferencesSelector).text();
var teachingMethod = $(teachingMethodSelector).text();
var grandingMethod = $(grandingMethodSelector).text();
var languageOfInstruction = $(languageOfInstructionSelector).text();
var modeOfDelivery = $(modeOfDeliverySelector).text();

var advancedData = {
courseWebsite: courseWebsite,
contentOutline: contentOutline,
learningOutcomes: learningOutcomes,
prerequisites: prerequisites,
basicTextbooks: basicTextbooks,
additionalReferences: additionalReferences,
teachingMethod: teachingMethod,
grandingMethod: grandingMethod,
languageOfInstruction: languageOfInstruction,
modeOfDelivery: modeOfDelivery
}

return advancedData;
}

module.exports = {
getAllCourseDetails,
getBasicCourseDetails,
getAdvancedCourseDetails
};
getAllCourseDetails,
getBasicCourseDetails,
getAdvancedCourseDetails
};
Loading

0 comments on commit b346081

Please sign in to comment.