Track run date and input paths for fb and flickr importers (#2673)

Track run date and input paths for fb and flickr importers
This commit is contained in:
Aaron Boodman
2016-10-06 18:52:30 -07:00
committed by GitHub
parent 71b9fd9337
commit 53d5b59017
6 changed files with 113 additions and 15 deletions

View File

@@ -6,6 +6,7 @@
import {assert} from 'chai';
import {suite, test} from 'mocha';
import {invariant} from './assert.js';
import {getHash} from './get-hash.js';
import List from './list.js';
import {DatabaseSpec, DatasetSpec, PathSpec} from './specs.js';
@@ -100,6 +101,7 @@ suite('Specs', () => {
const spec = DatabaseSpec.parse(tc.spec);
assert.strictEqual(spec.scheme, tc.scheme);
assert.strictEqual(spec.path, tc.path);
assert.strictEqual(tc.spec, spec.toString());
}
});
@@ -154,6 +156,7 @@ suite('Specs', () => {
assert.strictEqual(tc.scheme, scheme);
assert.strictEqual(tc.path, path);
assert.strictEqual(tc.name, spec.name);
assert.strictEqual(tc.spec, spec.toString());
}
});
@@ -189,6 +192,34 @@ suite('Specs', () => {
assert.strictEqual(tc.scheme, scheme);
assert.strictEqual(tc.dbPath, path);
assert.strictEqual(tc.pathStr, spec.path.toString());
assert.strictEqual(tc.spec, spec.toString());
}
});
test('PathSpec.pin', async () => {
const dbSpec = DatabaseSpec.parse('mem');
const db = dbSpec.database();
let ds = db.getDataset('foo');
ds = await db.commit(ds, 42);
const unpinned = PathSpec.parse('mem::foo.value');
unpinned.database = dbSpec;
const pinned = await unpinned.pin();
invariant(pinned);
const pinnedHash = pinned.path.hash;
invariant(pinnedHash);
const h = await ds.head();
invariant(h);
assert.strictEqual(h.hash.toString(), pinnedHash.toString());
assert.strictEqual(`mem::#${h.hash.toString()}.value`, pinned.toString());
assert.strictEqual(42, (await pinned.value())[1]);
assert.strictEqual(42, (await unpinned.value())[1]);
ds = await db.commit(ds, 43);
assert.strictEqual(42, (await pinned.value())[1]);
assert.strictEqual(43, (await unpinned.value())[1]);
});
});

View File

@@ -85,6 +85,18 @@ export class DatabaseSpec {
throw new Error('Unreached');
}
}
toString(): string {
switch (this.scheme) {
case 'mem':
return this.scheme;
case 'http':
case 'https':
return `${this.scheme}:${this.path}`;
default:
throw new Error('Unreached');
}
}
}
/**
@@ -131,6 +143,10 @@ export class DatasetSpec {
const [db, ds] = this.dataset();
return ds.head().then(commit => [db, commit ? commit.value : null]);
}
toString(): string {
return `${this.database.toString()}::${this.name}`;
}
}
/**
@@ -157,6 +173,23 @@ export class PathSpec {
this.path = path;
}
/**
* Returns a new PathSpec in which the dataset component, if any, has been
* replaced with the hash of the HEAD of that dataset. This "pins" the path
* to the state of the database at the current moment in time.
*/
async pin(): Promise<?PathSpec> {
if (this.path.dataset !== '') {
const ds = this.database.database().getDataset(this.path.dataset);
const commit = await ds.head();
if (!commit) {
return null;
}
return new PathSpec(this.database,
new AbsolutePath('', commit.hash, this.path.path));
}
}
/**
* Resolves this PathSpec, and returns the database it was resolved in, and the value it
* resolved to. If the value wasn't found, it will be `null`.
@@ -167,6 +200,10 @@ export class PathSpec {
const db = this.database.database();
return this.path.resolve(db).then(value => [db, value]);
}
toString(): string {
return `${this.database.toString()}::${this.path.toString()}`;
}
}
function splitAndParseDatabaseSpec(str: string): [DatabaseSpec, string] {

View File

@@ -8,6 +8,7 @@ import argv from 'yargs';
import {
createStructClass,
DatasetSpec,
PathSpec,
isSubtype,
makeListType,
makeStructType,
@@ -23,7 +24,7 @@ import {
const args = argv
.usage(
'Finds photos in slurped Facebook metadata\n\n' +
'Usage: node . <in-dataset> <out-dataset>')
'Usage: node . <in-path> <out-dataset>')
.demand(2)
.argv;
@@ -69,10 +70,14 @@ const NomsDate = createStructClass(
makeStructType('Date', {nsSinceEpoch: numberType}));
async function main(): Promise<void> {
const inSpec = DatasetSpec.parse(args._[0]);
const [db, input] = await inSpec.value();
const inSpec = PathSpec.parse(args._[0]);
const pinnedSpec = await inSpec.pin();
if (!pinnedSpec) {
throw `Invalid input dataset: ${inSpec.path.dataset}`;
}
const [db, input] = await pinnedSpec.value();
if (!input) {
return db.close();
throw `Invalid input spec: ${inSpec.toString()}`;
}
const outSpec = DatasetSpec.parse(args._[1]);
const [outDB, output] = outSpec.dataset();
@@ -99,9 +104,14 @@ async function main(): Promise<void> {
}
});
return outDB.commit(output, await result)
.then(() => db.close())
.then(() => outDB.close());
return outDB.commit(output, await result, {
meta: newStruct('', {
date: new Date().toISOString(),
input: pinnedSpec.toString(),
}),
})
.then(() => db.close())
.then(() => outDB.close());
}
function getGeo(input): Struct {

View File

@@ -104,7 +104,14 @@ async function main(): Promise<void> {
getPhotos(),
// TODO: Add more object types here
]);
await db.commit(out, newStruct('', {user, photos}));
await db.commit(out, newStruct('', {
user,
photos,
}), {
meta: newStruct('', {
date: new Date().toISOString(),
}),
});
process.stdout.write(clearLine);
return;
}

View File

@@ -7,6 +7,7 @@
import argv from 'yargs';
import {
DatasetSpec,
PathSpec,
getTypeOfValue,
isSubtype,
makeStructType,
@@ -76,10 +77,14 @@ main().catch(ex => {
});
async function main(): Promise<void> {
const inSpec = DatasetSpec.parse(args._[0]);
const [db, input] = await inSpec.value();
const inSpec = PathSpec.parse(args._[0]);
const pinnedSpec = await inSpec.pin();
if (!pinnedSpec) {
throw `Input dataset ${inSpec.path.dataset} does not exist`;
}
const [db, input] = await pinnedSpec.value();
if (!input) {
return db.close();
throw `Input spec ${args._[0]} does not exist`;
}
const outSpec = DatasetSpec.parse(args._[1]);
const [outDB, output] = outSpec.dataset();
@@ -112,9 +117,14 @@ async function main(): Promise<void> {
return false;
});
return outDB.commit(output, await result)
.then(() => db.close())
.then(() => outDB.close());
return outDB.commit(output, await result, {
meta: newStruct('', {
data: new Date().toISOString(),
input: pinnedSpec.toString(),
}),
})
.then(() => db.close())
.then(() => outDB.close());
}
function getGeo(input: Object): Struct {

View File

@@ -86,7 +86,10 @@ async function main(): Promise<void> {
return db.commit(out, newStruct('', {
photosetsMeta: jsonToNoms(photosetsJSON),
photosets: await photosets,
})).then(() => db.close());
}), {
meta: newStruct('', {date: new Date().toISOString()}),
})
.then(() => db.close());
}
async function getPhotosetsJSON(): Promise<any> {