-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdigital.gov-scan-upload.js
231 lines (197 loc) · 8.45 KB
/
digital.gov-scan-upload.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/*
* GitHub Repo Tracker Script
*
* Description:
* This Node.js script downloads a CSV file from a specified URL, renames it with the current date,
* and uploads its contents to a new Google Sheets document. The script also updates an "Introduction" sheet
* within the Google Sheets document to keep track of the new sheet names. The Google Sheets API is used
* to handle authentication and data operations.
*
* Features:
* - Downloads a CSV file from a given URL.
* - Renames the downloaded file with the current date.
* - Authenticates and uploads the CSV data to a new Google Sheets document.
* - Ensures unique sheet titles to prevent overwrites.
* - Updates an "Introduction" sheet with the title of the newly created sheet.
*
* This script is intended to run on a weekly cron job to automate data collection and organization.
*
* License:
* This script is licensed under the GNU General Public License v3.0.
* You can redistribute it and/or modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along with this program. If not, see
* <https://www.gnu.org/licenses/>.
*/
const axios = require('axios');
const { google } = require('googleapis');
const fs = require('fs');
const path = require('path');
const { parse } = require('csv-parse/sync');
const baseDir = path.resolve(__dirname);
const RESULTS_DIR = path.join(baseDir, "results");
const CREDENTIALS_PATH = path.join(baseDir, "credentials.json");
const TOKEN_PATH = path.join(baseDir, "token.json");
console.log('Reading credentials from:', CREDENTIALS_PATH);
console.log('File exists:', fs.existsSync(CREDENTIALS_PATH));
console.log("Environment variables:", process.env);
const downloadAndRenameFile = async (url) => {
console.log('Downloading file from:', url);
const response = await axios({
url,
method: 'GET',
responseType: 'stream'
});
const date = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
const filePath = path.join(__dirname, `weekly-snapshot-${date}.csv`);
console.log('Renaming downloaded file to:', filePath);
const writer = fs.createWriteStream(filePath);
response.data.pipe(writer);
return new Promise((resolve, reject) => {
writer.on('finish', () => {
console.log('File download and rename finished');
resolve(filePath);
});
writer.on('error', (error) => {
console.error('Error in file download and rename:', error);
reject(error);
});
});
};
async function authenticateGoogleSheets(credentialsPath) {
console.log('Authenticating Google Sheets with credentials from:', credentialsPath);
const content = await fs.promises.readFile(credentialsPath, 'utf8');
const credentials = JSON.parse(content);
const { client_secret, client_id, redirect_uris } = credentials.installed;
const oAuth2Client = new google.auth.OAuth2(client_id, client_secret, redirect_uris[0]);
let token;
try {
console.log('Reading token from:', TOKEN_PATH);
token = await fs.promises.readFile(TOKEN_PATH, 'utf8');
} catch (error) {
console.error('Token file not found, requesting new token:', error);
return getNewToken(oAuth2Client);
}
oAuth2Client.setCredentials(JSON.parse(token));
console.log('Google Sheets authenticated successfully');
return oAuth2Client;
}
function getNewToken(oAuth2Client) {
const authUrl = oAuth2Client.generateAuthUrl({
access_type: 'offline',
scope: ['https://www.googleapis.com/auth/spreadsheets'],
});
console.log('Authorize this app by visiting this URL:', authUrl);
// Further implementation needed to handle the new token
// Typically involves a web server to receive the response
// Handle errors
try {
// Code that may throw an error
} catch (error) {
console.error('Error in token generation:', error);
// Handle the error appropriately
}
}
function cleanCell(cell) {
let cleanedCell = cell.replace(/^"|"$/g, "").trim(); // Remove surrounding quotes
cleanedCell = cleanedCell.replace(/""/g, '"'); // Replace double quotes with single
return cleanedCell;
}
const uploadToGoogleSheet = async (filePath, sheets, spreadsheetId) => {
console.log('Uploading data to Google Sheets from file:', filePath);
const content = fs.readFileSync(filePath, 'utf8');
// Parse the CSV content correctly handling commas within quotes
const records = parse(content, {
columns: false,
skip_empty_lines: true,
});
const rows = records.map(row => row.map(cell => cleanCell(cell)));
const today = new Date();
let sheetTitle = `${today.getFullYear()}-${(today.getMonth() + 1).toString().padStart(2, "0")}-${today.getDate().toString().padStart(2, "0")}`;
console.log('Checking if sheet with title already exists:', sheetTitle);
// Check if the sheet already exists
const sheetExists = await sheets.spreadsheets.get({
spreadsheetId
}).then(res => {
return res.data.sheets.some(sheet => sheet.properties.title === sheetTitle);
});
if (sheetExists) {
console.log(`Sheet with title "${sheetTitle}" already exists. Generating a new title.`);
sheetTitle = `${sheetTitle}-${Date.now()}`; // Append timestamp to make title unique
}
console.log('Creating new sheet with title:', sheetTitle);
// Create a new sheet with the unique title
await sheets.spreadsheets.batchUpdate({
spreadsheetId,
resource: {
requests: [
{
addSheet: {
properties: {
title: sheetTitle,
},
},
},
],
},
});
console.log('Appending data to new sheet:', sheetTitle);
// Append the data to the new sheet
await sheets.spreadsheets.values.append({
spreadsheetId,
range: `${sheetTitle}!A1`,
valueInputOption: 'USER_ENTERED',
resource: {
values: rows
}
});
console.log('Data uploaded to Google Sheets successfully');
return sheetTitle;
};
const updateIntroductionSheet = async (sheets, spreadsheetId, sheetTitle) => {
console.log('Updating the "Introduction" sheet with new data');
// Get the data in column G of the "Introduction" sheet
const range = 'Introduction!G:G';
const response = await sheets.spreadsheets.values.get({
spreadsheetId,
range: range,
});
const values = response.data.values || [];
const lastRow = values.length + 1;
// Update the last empty cell in column G with the new sheet title
await sheets.spreadsheets.values.update({
spreadsheetId,
range: `Introduction!G${lastRow}`,
valueInputOption: 'USER_ENTERED',
resource: {
values: [[sheetTitle]]
}
});
console.log(`"Introduction" sheet updated successfully with ${sheetTitle} at row ${lastRow}`);
};
const main = async () => {
const fileUrl = 'https://api.gsa.gov/technology/site-scanning/data/weekly-snapshot.csv';
const spreadsheetId = '1CsXAzCzghYYwXzGCcrJqrsWpr5f7MbID2Qw6vQvi3sQ'; // Replace with your spreadsheet ID
try {
console.log('Starting file download and rename process');
const filePath = await downloadAndRenameFile(fileUrl);
console.log('File downloaded and renamed:', filePath);
console.log('Starting Google Sheets authentication process');
const auth = await authenticateGoogleSheets(CREDENTIALS_PATH);
const sheets = google.sheets({ version: 'v4', auth });
console.log('Starting data upload to Google Sheets');
const sheetTitle = await uploadToGoogleSheet(filePath, sheets, spreadsheetId);
console.log('File uploaded successfully');
console.log('Updating the "Introduction" sheet');
await updateIntroductionSheet(sheets, spreadsheetId, sheetTitle);
console.log('Introduction sheet updated successfully');
} catch (error) {
console.error('Error in main process:', error);
}
};
main();