Before I lose the elements on that point, here is a report of my expirementations.
Problem reproduction
Version : js-libp2p@0.36.2
OS: Windows (with Git-Bash)
The following script, inspired from js-libp2p/examples/peer-and-content-routing/2.js, starts 3 nodes:
- node1: the content provider
- node2: an intermediate node
- node3: the content querier, but contrary to the initial peer-and-content-routing/2.js, this node may not be started yet when node1 announces its content.
Considering that timing effects are involved, it takes 4 arguments:
-
testOption
: Either 1 or 2:
1 = node3 started when node1 announces its content.
2 = node3 started after node1 has announced its content.
-
delay1
: onConnect handlers delay
Waiting time in ms, after the first nodes have been started (with or without node3),
and before node1 announces its content.
-
delay2
: propagation delay
Waiting time in ms, after node1 has announced its content,
and before node3 is started and/or queries for node1’s content.
-
delay3
: onConnect handlers delay again
Applicable when testOption
is 2.
Waiting time in ms, after node3 has been started,
and before node3 queries for node1’s content.
js-libp2p.discuss#1406.js:
/* eslint-disable no-console */
'use strict'
// Analyze arguments.
function usage() {
console.log('USAGE: node %s <test option> <delay1> <delay2> <delay3>', require('path').basename(process.argv[1]));
}
// - #1: Test option - 1 or 2
const testOption = parseInt(process.argv[2] || "0");
if ((testOption !== 1) && (testOption !== 2)) {
usage();
console.log('');
console.log('Please choose option 1 or 2:');
console.log('- Option 1: start and connect node1, node2 and node3 before node1 provides its content');
console.log('- Option 2: start and connect node3 after node1 has provided its content');
process.exit(1);
}
// - #2: delay1
const delay1 = parseInt(process.argv[3] || "100");
// - #3: delay2
const delay2 = parseInt(process.argv[4] || "300");
// - #4: delay3
const delay3 = parseInt(process.argv[5] || "100");
console.log('Test conditions:')
console.log('- test option: %s', testOption);
console.log('- delay1: %s ms (\'Wait for onConnect handlers in the DHT\' for node1, node2 and optionally node3)', delay1);
console.log('- delay2: %s ms (\'wait for propagation\')', delay2);
console.log('- delay3: %s ms (\'Wait for onConnect handlers in the DHT\' for node3 when started after node1 has provided its content', delay3);
/**
* Hack libp2p-kad-dht's logger factory.
* See [js-libp2p-kad-dht#314](https://github.com/libp2p/js-libp2p-kad-dht/issues/314).
*/
(() => {
console.log('Patching libp2p-kad-dht');
const debug = require('debug');
const libp2pKadDhtUtils = require('libp2p-kad-dht/src/utils')
const _fixedFormatterCallback = (p) => {
try {
return p.toB58String();
} catch (_err) {
return p.toString();
}
};
const _defaultLoggerFactory = libp2pKadDhtUtils.logger;
const _hackedLoggerFactory = (name) => {
// Call default implementation.
const _logger = _defaultLoggerFactory(name);
// Fix 'debug' formatter.
debug.formatters.p = _fixedFormatterCallback;
return _logger;
};
libp2pKadDhtUtils.logger = _hackedLoggerFactory;
})();
// Inspired from 'js-libp2p/examples/peer-and-content-routing/2.js'.
const Libp2p = require('libp2p')
const TCP = require('libp2p-tcp')
const Mplex = require('libp2p-mplex')
const { NOISE } = require('@chainsafe/libp2p-noise')
const { CID } = require('multiformats/cid')
const KadDHT = require('libp2p-kad-dht')
const delay = require('delay')
const createNode = async () => {
const node = await Libp2p.create({
addresses: {
listen: ['/ip4/0.0.0.0/tcp/0']
},
modules: {
transport: [TCP],
streamMuxer: [Mplex],
connEncryption: [NOISE],
dht: KadDHT
},
config: {
dht: {
enabled: true
}
}
})
// await node.start()
return node
}
;(async () => {
// Create nodes.
console.log('Creating nodes:');
const [node1, node2, node3] = await Promise.all([
createNode(),
createNode(),
createNode(),
]);
console.log('- node1: %s', node1.peerId.toB58String());
console.log('- node2: %s', node2.peerId.toB58String());
console.log('- node3: %s', node3.peerId.toB58String());
const cid = CID.parse('QmTp9VkYvnHyrqKQuFPiuZkiX9gPcqj6x5LJ1rmWuSySnL');
// Start and connect node1 and node2.
console.log('Starting node1');
await node1.start();
console.log('Starting node2');
await node2.start();
console.log('Dialing node2 from node1');
await node1.peerStore.addressBook.set(node2.peerId, node2.multiaddrs);
await node1.dial(node2.peerId);
// Option 1: start and connect node3.
if (testOption === 1) {
console.log('Option 1: Starting node3 before providing %s with node1', cid.toString());
await node3.start();
await node2.peerStore.addressBook.set(node3.peerId, node3.multiaddrs);
node2.dial(node3.peerId);
} else {
console.log('Option 2: node3 not started before providing %s with node1', cid.toString());
}
console.log('Waiting for delay1 - %s ms', delay1);
await delay(delay1); // Wait for onConnect handlers in the DHT
// Provide content with node1.
console.log('Providing %s with node1', cid.toString());
await node1.contentRouting.provide(cid);
console.log('node1 is providing %s', cid.toString());
console.log('Waiting for delay2 - %s ms', delay2);
await delay(delay2); // wait for propagation
// Option 2: Start and connect node3 if not already started.
if (testOption === 2) {
console.log('Option 2: Starting node3 after node1 provided %s', cid.toString());
await node3.start();
await node2.peerStore.addressBook.set(node3.peerId, node3.multiaddrs);
node2.dial(node3.peerId);
console.log('Waiting for delay3 - %s ms', delay3);
await delay(delay3); // Wait for onConnect handlers in the DHT
}
// Search for node1's content from node3.
const t0 = Date.now();
try {
for await (const provider of node3.contentRouting.findProviders(cid, { timeout: 3000 })) {
console.log('Found provider from node3 in %s ms for %s: %s', Date.now() - t0, cid.toString(), provider.id.toB58String());
console.log('OK');
process.exit(0);
}
} catch (_err) {
console.log('ERROR: No provider found from node3 in %s ms for %s', Date.now() - t0, cid.toString());
process.exit(1);
}
})();
And since it is about timings, and thus random results, the following bash script makes it possible to repeat executions of the script above, and count successes and failures:
repeat.sh:
#!/usr/bin/env bash
_count=0
_success=0
while [ ${_count} -lt 10 ] ; do
$* && _success=$[${_success}+1]
_count=$[_count+1]
echo "---"
echo "Success: ${_success}/${_count}"
echo "---"
done
The following command executes the test scenario in the same conditions as peer-and-content-routing/2.js:
$ ./repeat.sh node js-libp2p.discuss#1406.js 1 100 300 100
Success: 10/10
Keeping the same timings, but having node3 started after node1 has announced its content gives less stable results:
$ ./repeat.sh node js-libp2p.discuss#1406.js 2 100 300 100
Success: 2/10
Experimentations
Test option 1
Default configuration:
./repeat.sh node ./js-libp2p#xxx.js 1 100 300
=> success (score = 10/10)
Let’s try to lower delay1:
./repeat.sh node ./js-libp2p#xxx.js 1 10 300
=> failures occur (score = 5/10)
Actually, delay1 can’t reasonably be less than 50 ms on my configuration to have the thing work reliably:
./repeat.sh node ./js-libp2p#xxx.js 1 50 300
=> success (score = 10/10)
Let’s try to lower delay2 now:
./repeat.sh node ./js-libp2p#xxx.js 1 50 0
=> success (score = 10/10)
To conclude:
-
Some time must be awaited after a connection with a remote peer before a content provider announces the content it provides (~50 ms).
- In this use case, no need to wait more time “for propagation”.
Test option 2:
Default configuration:
./repeat.sh node ./js-libp2p#xxx.js 2 100 300 100
=> failures occur (score = 8/10)
Let’s augment delay2:
./repeat.sh node ./js-libp2p#xxx.js 2 100 10000 100
=> failures still occur (even worse: score = 3/10)
Reset delay2, then let’s augment delay3:
./repeat.sh node ./js-libp2p#xxx.js 2 100 300 200
=> success (score = 10/10)
Lower delay3 as much as possible.
delay3 can’t reasonably be less than 160 ms on my configuration to have the thing work reliably:
./repeat.sh node ./js-libp2p#xxx.js 2 100 300 160
=> success (score = 10/10)
Set delay3 with 200 ms, and lower delay1 and delay2 as learnt with test option 1:
./repeat.sh node ./js-libp2p#xxx.js 2 50 0 150
=> success (score = 10/10)
To conclude:
- In this second use case, the time awaited “for propagation” seems to have no effect neither.
-
The delay that actually has an effect on whether the content discovery succeeds or not is the time awaited by the discoverer after being connected to a remote peer (~200 ms).