Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
adam.huang
go-libp2p
Commits
4ae3510f
Commit
4ae3510f
authored
8 years ago
by
Jeromy Johnson
Committed by
GitHub
8 years ago
Browse files
Options
Download
Plain Diff
Merge pull request #107 from libp2p/feat/better-dialsync
Improve swarm dial sync code
parents
d3f70d18
126e1506
master
2018-Q4-OKR
docs-improvements
feat/p2p-multiaddr
feat/pnet/working3
feat/protobuf
feat/relay-integrate
feat/udp
feat/update/go-reuseport
feature/standardize-readme
fix/473
fix/no-custom-field
fix/reset-ping-stream
fix/revert-correct-external-addr
gx/update-jccl6u
gx/update-nza0mn
jenkinsfile
kevina/fix-go-vet
multistream-ping
punching
revert-276-update-go-detect-race
v6.0.23
v6.0.22
v6.0.21
v6.0.20
v6.0.19
v6.0.18
v6.0.17
v6.0.16
v6.0.15
v6.0.14
v6.0.13
v6.0.12
v6.0.11
v6.0.10
v6.0.9
v6.0.8
v6.0.7
v6.0.6
v6.0.5
v6.0.4
v6.0.3
v6.0.2
v6.0.1
v6.0.0
v5.0.21
v5.0.20
v5.0.19
v5.0.18
v5.0.17
v5.0.16
v5.0.15
v5.0.14
v5.0.13
v5.0.12
v5.0.11
v5.0.10
v5.0.9
v5.0.8
v5.0.7
v5.0.6
v5.0.5
v5.0.4
v5.0.3
v5.0.2
v5.0.1
v5.0.0
v4.5.5
v4.5.4
v4.5.3
v4.5.2
v4.5.1
v4.5.0
v4.4.5
v4.4.4
v4.4.3
v4.4.2
v4.4.1
v4.4.0
v4.3.12
v4.3.11
v4.3.10
v4.3.9
v4.3.8
v4.3.7
v4.3.6
v4.3.5
v4.3.4
v4.3.3
v4.3.2
v4.3.1
v4.3.0
v4.2.0
v4.1.0
v4.0.4
v4.0.3
v4.0.2
v4.0.1
v4.0.0
v3.6.0
v3.5.4
v3.5.3
No related merge requests found
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
p2p/net/swarm/dial_sync.go
+92
-0
p2p/net/swarm/dial_sync.go
p2p/net/swarm/dial_sync_test.go
+203
-0
p2p/net/swarm/dial_sync_test.go
p2p/net/swarm/dial_test.go
+0
-1
p2p/net/swarm/dial_test.go
p2p/net/swarm/limiter.go
+1
-1
p2p/net/swarm/limiter.go
p2p/net/swarm/limiter_test.go
+1
-1
p2p/net/swarm/limiter_test.go
p2p/net/swarm/swarm.go
+3
-2
p2p/net/swarm/swarm.go
p2p/net/swarm/swarm_dial.go
+28
-122
p2p/net/swarm/swarm_dial.go
with
328 additions
and
127 deletions
+328
-127
p2p/net/swarm/dial_sync.go
0 → 100644
View file @
4ae3510f
package
swarm
import
(
"context"
"sync"
peer
"github.com/ipfs/go-libp2p-peer"
)
type
DialFunc
func
(
context
.
Context
,
peer
.
ID
)
(
*
Conn
,
error
)
func
NewDialSync
(
dfn
DialFunc
)
*
DialSync
{
return
&
DialSync
{
dials
:
make
(
map
[
peer
.
ID
]
*
activeDial
),
dialFunc
:
dfn
,
}
}
type
DialSync
struct
{
dials
map
[
peer
.
ID
]
*
activeDial
dialsLk
sync
.
Mutex
dialFunc
DialFunc
}
type
activeDial
struct
{
id
peer
.
ID
refCnt
int
refCntLk
sync
.
Mutex
cancel
func
()
err
error
conn
*
Conn
waitch
chan
struct
{}
ds
*
DialSync
}
func
(
dr
*
activeDial
)
wait
(
ctx
context
.
Context
)
(
*
Conn
,
error
)
{
defer
dr
.
decref
()
select
{
case
<-
dr
.
waitch
:
return
dr
.
conn
,
dr
.
err
case
<-
ctx
.
Done
()
:
return
nil
,
ctx
.
Err
()
}
}
func
(
ad
*
activeDial
)
incref
()
{
ad
.
refCntLk
.
Lock
()
defer
ad
.
refCntLk
.
Unlock
()
ad
.
refCnt
++
}
func
(
ad
*
activeDial
)
decref
()
{
ad
.
refCntLk
.
Lock
()
defer
ad
.
refCntLk
.
Unlock
()
ad
.
refCnt
--
if
ad
.
refCnt
<=
0
{
ad
.
cancel
()
ad
.
ds
.
dialsLk
.
Lock
()
delete
(
ad
.
ds
.
dials
,
ad
.
id
)
ad
.
ds
.
dialsLk
.
Unlock
()
}
}
func
(
ds
*
DialSync
)
DialLock
(
ctx
context
.
Context
,
p
peer
.
ID
)
(
*
Conn
,
error
)
{
ds
.
dialsLk
.
Lock
()
actd
,
ok
:=
ds
.
dials
[
p
]
if
!
ok
{
ctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
actd
=
&
activeDial
{
id
:
p
,
cancel
:
cancel
,
waitch
:
make
(
chan
struct
{}),
ds
:
ds
,
}
ds
.
dials
[
p
]
=
actd
go
func
(
ctx
context
.
Context
,
p
peer
.
ID
,
ad
*
activeDial
)
{
ad
.
conn
,
ad
.
err
=
ds
.
dialFunc
(
ctx
,
p
)
close
(
ad
.
waitch
)
ad
.
cancel
()
ad
.
waitch
=
nil
// to ensure nobody tries reusing this
}(
ctx
,
p
,
actd
)
}
actd
.
incref
()
ds
.
dialsLk
.
Unlock
()
return
actd
.
wait
(
ctx
)
}
This diff is collapsed.
Click to expand it.
p2p/net/swarm/dial_sync_test.go
0 → 100644
View file @
4ae3510f
package
swarm
import
(
"context"
"fmt"
"sync"
"testing"
"time"
peer
"github.com/ipfs/go-libp2p-peer"
)
func
getMockDialFunc
()
(
DialFunc
,
func
(),
context
.
Context
,
<-
chan
struct
{})
{
dfcalls
:=
make
(
chan
struct
{},
512
)
// buffer it large enough that we won't care
dialctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
ch
:=
make
(
chan
struct
{})
f
:=
func
(
ctx
context
.
Context
,
p
peer
.
ID
)
(
*
Conn
,
error
)
{
dfcalls
<-
struct
{}{}
defer
cancel
()
select
{
case
<-
ch
:
return
new
(
Conn
),
nil
case
<-
ctx
.
Done
()
:
return
nil
,
ctx
.
Err
()
}
}
o
:=
new
(
sync
.
Once
)
return
f
,
func
()
{
o
.
Do
(
func
()
{
close
(
ch
)
})
},
dialctx
,
dfcalls
}
func
TestBasicDialSync
(
t
*
testing
.
T
)
{
df
,
done
,
_
,
callsch
:=
getMockDialFunc
()
dsync
:=
NewDialSync
(
df
)
p
:=
peer
.
ID
(
"testpeer"
)
ctx
:=
context
.
Background
()
finished
:=
make
(
chan
struct
{})
go
func
()
{
_
,
err
:=
dsync
.
DialLock
(
ctx
,
p
)
if
err
!=
nil
{
t
.
Error
(
err
)
}
finished
<-
struct
{}{}
}()
go
func
()
{
_
,
err
:=
dsync
.
DialLock
(
ctx
,
p
)
if
err
!=
nil
{
t
.
Error
(
err
)
}
finished
<-
struct
{}{}
}()
// short sleep just to make sure we've moved around in the scheduler
time
.
Sleep
(
time
.
Millisecond
*
20
)
done
()
<-
finished
<-
finished
if
len
(
callsch
)
>
1
{
t
.
Fatal
(
"should only have called dial func once!"
)
}
}
func
TestDialSyncCancel
(
t
*
testing
.
T
)
{
df
,
done
,
_
,
dcall
:=
getMockDialFunc
()
dsync
:=
NewDialSync
(
df
)
p
:=
peer
.
ID
(
"testpeer"
)
ctx1
,
cancel1
:=
context
.
WithCancel
(
context
.
Background
())
finished
:=
make
(
chan
struct
{})
go
func
()
{
_
,
err
:=
dsync
.
DialLock
(
ctx1
,
p
)
if
err
!=
ctx1
.
Err
()
{
t
.
Error
(
"should have gotten context error"
)
}
finished
<-
struct
{}{}
}()
// make sure the above makes it through the wait code first
select
{
case
<-
dcall
:
case
<-
time
.
After
(
time
.
Second
)
:
t
.
Fatal
(
"timed out waiting for dial to start"
)
}
// Add a second dialwait in so two actors are waiting on the same dial
go
func
()
{
_
,
err
:=
dsync
.
DialLock
(
context
.
Background
(),
p
)
if
err
!=
nil
{
t
.
Error
(
err
)
}
finished
<-
struct
{}{}
}()
time
.
Sleep
(
time
.
Millisecond
*
20
)
// cancel the first dialwait, it should not affect the second at all
cancel1
()
select
{
case
<-
finished
:
case
<-
time
.
After
(
time
.
Second
)
:
t
.
Fatal
(
"timed out waiting for wait to exit"
)
}
// short sleep just to make sure we've moved around in the scheduler
time
.
Sleep
(
time
.
Millisecond
*
20
)
done
()
<-
finished
}
func
TestDialSyncAllCancel
(
t
*
testing
.
T
)
{
df
,
done
,
dctx
,
_
:=
getMockDialFunc
()
dsync
:=
NewDialSync
(
df
)
p
:=
peer
.
ID
(
"testpeer"
)
ctx1
,
cancel1
:=
context
.
WithCancel
(
context
.
Background
())
finished
:=
make
(
chan
struct
{})
go
func
()
{
_
,
err
:=
dsync
.
DialLock
(
ctx1
,
p
)
if
err
!=
ctx1
.
Err
()
{
t
.
Error
(
"should have gotten context error"
)
}
finished
<-
struct
{}{}
}()
// Add a second dialwait in so two actors are waiting on the same dial
go
func
()
{
_
,
err
:=
dsync
.
DialLock
(
ctx1
,
p
)
if
err
!=
ctx1
.
Err
()
{
t
.
Error
(
"should have gotten context error"
)
}
finished
<-
struct
{}{}
}()
cancel1
()
for
i
:=
0
;
i
<
2
;
i
++
{
select
{
case
<-
finished
:
case
<-
time
.
After
(
time
.
Second
)
:
t
.
Fatal
(
"timed out waiting for wait to exit"
)
}
}
// the dial should have exited now
select
{
case
<-
dctx
.
Done
()
:
case
<-
time
.
After
(
time
.
Second
)
:
t
.
Fatal
(
"timed out waiting for dial to return"
)
}
// should be able to successfully dial that peer again
done
()
_
,
err
:=
dsync
.
DialLock
(
context
.
Background
(),
p
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
}
func
TestFailFirst
(
t
*
testing
.
T
)
{
var
count
int
f
:=
func
(
ctx
context
.
Context
,
p
peer
.
ID
)
(
*
Conn
,
error
)
{
if
count
>
0
{
return
new
(
Conn
),
nil
}
count
++
return
nil
,
fmt
.
Errorf
(
"gophers ate the modem"
)
}
ds
:=
NewDialSync
(
f
)
p
:=
peer
.
ID
(
"testing"
)
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
time
.
Second
*
5
)
defer
cancel
()
_
,
err
:=
ds
.
DialLock
(
ctx
,
p
)
if
err
==
nil
{
t
.
Fatal
(
"expected gophers to have eaten the modem"
)
}
c
,
err
:=
ds
.
DialLock
(
ctx
,
p
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
if
c
==
nil
{
t
.
Fatal
(
"should have gotten a 'real' conn back"
)
}
}
This diff is collapsed.
Click to expand it.
p2p/net/swarm/dial_test.go
View file @
4ae3510f
...
...
@@ -415,7 +415,6 @@ func TestDialBackoff(t *testing.T) {
if
!
s1
.
backf
.
Backoff
(
s3p
)
{
t
.
Error
(
"s3 should be on backoff"
)
}
}
}
...
...
This diff is collapsed.
Click to expand it.
p2p/net/swarm/limiter.go
View file @
4ae3510f
package
swarm
import
(
"context"
"sync"
peer
"github.com/ipfs/go-libp2p-peer"
ma
"github.com/jbenet/go-multiaddr"
context
"golang.org/x/net/context"
conn
"github.com/libp2p/go-libp2p/p2p/net/conn"
addrutil
"github.com/libp2p/go-libp2p/p2p/net/swarm/addr"
...
...
This diff is collapsed.
Click to expand it.
p2p/net/swarm/limiter_test.go
View file @
4ae3510f
package
swarm
import
(
"context"
"fmt"
"math/rand"
"strconv"
...
...
@@ -10,7 +11,6 @@ import (
peer
"github.com/ipfs/go-libp2p-peer"
ma
"github.com/jbenet/go-multiaddr"
mafmt
"github.com/whyrusleeping/mafmt"
context
"golang.org/x/net/context"
conn
"github.com/libp2p/go-libp2p/p2p/net/conn"
)
...
...
This diff is collapsed.
Click to expand it.
p2p/net/swarm/swarm.go
View file @
4ae3510f
...
...
@@ -3,6 +3,7 @@
package
swarm
import
(
"context"
"fmt"
"io/ioutil"
"os"
...
...
@@ -32,7 +33,6 @@ import (
yamux
"github.com/whyrusleeping/go-smux-yamux"
mafilter
"github.com/whyrusleeping/multiaddr-filter"
ws
"github.com/whyrusleeping/ws-transport"
context
"golang.org/x/net/context"
)
var
log
=
logging
.
Logger
(
"swarm2"
)
...
...
@@ -76,7 +76,7 @@ type Swarm struct {
peers
pstore
.
Peerstore
connh
ConnHandler
dsync
d
ial
s
ync
dsync
*
D
ial
S
ync
backf
dialbackoff
dialT
time
.
Duration
// mainly for tests
...
...
@@ -134,6 +134,7 @@ func NewSwarm(ctx context.Context, listenAddrs []ma.Multiaddr,
dialer
:
conn
.
NewDialer
(
local
,
peers
.
PrivKey
(
local
),
wrap
),
}
s
.
dsync
=
NewDialSync
(
s
.
doDial
)
s
.
limiter
=
newDialLimiter
(
s
.
dialAddr
)
// configure Swarm
...
...
This diff is collapsed.
Click to expand it.
p2p/net/swarm/swarm_dial.go
View file @
4ae3510f
package
swarm
import
(
"context"
"errors"
"fmt"
"sync"
...
...
@@ -11,7 +12,6 @@ import (
ma
"github.com/jbenet/go-multiaddr"
conn
"github.com/libp2p/go-libp2p/p2p/net/conn"
addrutil
"github.com/libp2p/go-libp2p/p2p/net/swarm/addr"
context
"golang.org/x/net/context"
)
// Diagram of dial sync:
...
...
@@ -53,78 +53,6 @@ const defaultPerPeerRateLimit = 8
// subcomponent of Dial)
var
DialTimeout
=
time
.
Second
*
10
// dialsync is a small object that helps manage ongoing dials.
// this way, if we receive many simultaneous dial requests, one
// can do its thing, while the rest wait.
//
// this interface is so would-be dialers can just:
//
// for {
// c := findConnectionToPeer(peer)
// if c != nil {
// return c
// }
//
// // ok, no connections. should we dial?
// if ok, wait := dialsync.Lock(peer); !ok {
// <-wait // can optionally wait
// continue
// }
// defer dialsync.Unlock(peer)
//
// c := actuallyDial(peer)
// return c
// }
//
type
dialsync
struct
{
// ongoing is a map of tickets for the current peers being dialed.
// this way, we dont kick off N dials simultaneously.
ongoing
map
[
peer
.
ID
]
chan
struct
{}
lock
sync
.
Mutex
}
// Lock governs the beginning of a dial attempt.
// If there are no ongoing dials, it returns true, and the client is now
// scheduled to dial. Every other goroutine that calls startDial -- with
//the same dst -- will block until client is done. The client MUST call
// ds.Unlock(p) when it is done, to unblock the other callers.
// The client is not reponsible for achieving a successful dial, only for
// reporting the end of the attempt (calling ds.Unlock(p)).
//
// see the example below `dialsync`
func
(
ds
*
dialsync
)
Lock
(
dst
peer
.
ID
)
(
bool
,
chan
struct
{})
{
ds
.
lock
.
Lock
()
if
ds
.
ongoing
==
nil
{
// init if not ready
ds
.
ongoing
=
make
(
map
[
peer
.
ID
]
chan
struct
{})
}
wait
,
found
:=
ds
.
ongoing
[
dst
]
if
!
found
{
ds
.
ongoing
[
dst
]
=
make
(
chan
struct
{})
}
ds
.
lock
.
Unlock
()
if
found
{
return
false
,
wait
}
// ok! you're signed up to dial!
return
true
,
nil
}
// Unlock releases waiters to a dial attempt. see Lock.
// if Unlock(p) is called without calling Lock(p) first, Unlock panics.
func
(
ds
*
dialsync
)
Unlock
(
dst
peer
.
ID
)
{
ds
.
lock
.
Lock
()
wait
,
found
:=
ds
.
ongoing
[
dst
]
if
!
found
{
panic
(
"called dialDone with no ongoing dials to peer: "
+
dst
.
Pretty
())
}
delete
(
ds
.
ongoing
,
dst
)
// remove ongoing dial
close
(
wait
)
// release everyone else
ds
.
lock
.
Unlock
()
}
// dialbackoff is a struct used to avoid over-dialing the same, dead peers.
// Whenever we totally time out on a peer (all three attempts), we add them
// to dialbackoff. Then, whenevers goroutines would _wait_ (dialsync), they
...
...
@@ -246,8 +174,7 @@ func (s *Swarm) bestConnectionToPeer(p peer.ID) *Conn {
// gatedDialAttempt is an attempt to dial a node. It is gated by the swarm's
// dial synchronization systems: dialsync and dialbackoff.
func
(
s
*
Swarm
)
gatedDialAttempt
(
ctx
context
.
Context
,
p
peer
.
ID
)
(
*
Conn
,
error
)
{
var
logdial
=
lgbl
.
Dial
(
"swarm"
,
s
.
LocalPeer
(),
p
,
nil
,
nil
)
defer
log
.
EventBegin
(
ctx
,
"swarmDialAttemptSync"
,
logdial
)
.
Done
()
defer
log
.
EventBegin
(
ctx
,
"swarmDialAttemptSync"
,
p
)
.
Done
()
// check if we already have an open connection first
conn
:=
s
.
bestConnectionToPeer
(
p
)
...
...
@@ -255,57 +182,36 @@ func (s *Swarm) gatedDialAttempt(ctx context.Context, p peer.ID) (*Conn, error)
return
conn
,
nil
}
// check if there's an ongoing dial to this peer
if
ok
,
wait
:=
s
.
dsync
.
Lock
(
p
);
ok
{
defer
s
.
dsync
.
Unlock
(
p
)
// if this peer has been backed off, lets get out of here
if
s
.
backf
.
Backoff
(
p
)
{
log
.
Event
(
ctx
,
"swarmDialBackoff"
,
logdial
)
return
nil
,
ErrDialBackoff
}
// ok, we have been charged to dial! let's do it.
// if it succeeds, dial will add the conn to the swarm itself.
defer
log
.
EventBegin
(
ctx
,
"swarmDialAttemptStart"
,
logdial
)
.
Done
()
ctxT
,
cancel
:=
context
.
WithTimeout
(
ctx
,
s
.
dialT
)
conn
,
err
:=
s
.
dial
(
ctxT
,
p
)
cancel
()
log
.
Debugf
(
"dial end %s"
,
conn
)
if
err
!=
nil
{
log
.
Event
(
ctx
,
"swarmDialBackoffAdd"
,
logdial
)
s
.
backf
.
AddBackoff
(
p
)
// let others know to backoff
// ok, we failed. try again. (if loop is done, our error is output)
return
nil
,
fmt
.
Errorf
(
"dial attempt failed: %s"
,
err
)
}
log
.
Event
(
ctx
,
"swarmDialBackoffClear"
,
logdial
)
s
.
backf
.
Clear
(
p
)
// okay, no longer need to backoff
return
conn
,
nil
}
else
{
// we did not dial. we must wait for someone else to dial.
// if this peer has been backed off, lets get out of here
if
s
.
backf
.
Backoff
(
p
)
{
log
.
Event
(
ctx
,
"swarmDialBackoff"
,
p
)
return
nil
,
ErrDialBackoff
}
// check whether we should backoff first...
if
s
.
backf
.
Backoff
(
p
)
{
log
.
Event
(
ctx
,
"swarmDialBackoff"
,
logdial
)
return
nil
,
ErrDialBackoff
}
return
s
.
dsync
.
DialLock
(
ctx
,
p
)
}
defer
log
.
EventBegin
(
ctx
,
"swarmDialWait"
,
logdial
)
.
Done
()
select
{
case
<-
wait
:
// wait for that other dial to finish.
// doDial is an ugly shim method to retain all the logging and backoff logic
// of the old dialsync code
func
(
s
*
Swarm
)
doDial
(
ctx
context
.
Context
,
p
peer
.
ID
)
(
*
Conn
,
error
)
{
var
logdial
=
lgbl
.
Dial
(
"swarm"
,
s
.
LocalPeer
(),
p
,
nil
,
nil
)
// ok, we have been charged to dial! let's do it.
// if it succeeds, dial will add the conn to the swarm itself.
defer
log
.
EventBegin
(
ctx
,
"swarmDialAttemptStart"
,
logdial
)
.
Done
()
ctxT
,
cancel
:=
context
.
WithTimeout
(
ctx
,
s
.
dialT
)
conn
,
err
:=
s
.
dial
(
ctxT
,
p
)
cancel
()
log
.
Debugf
(
"dial end %s"
,
conn
)
if
err
!=
nil
{
log
.
Event
(
ctx
,
"swarmDialBackoffAdd"
,
logdial
)
s
.
backf
.
AddBackoff
(
p
)
// let others know to backoff
// see if it worked, OR we got an incoming dial in the meantime...
conn
:=
s
.
bestConnectionToPeer
(
p
)
if
conn
!=
nil
{
return
conn
,
nil
}
return
nil
,
ErrDialFailed
case
<-
ctx
.
Done
()
:
// or we may have to bail...
return
nil
,
ctx
.
Err
()
}
// ok, we failed. try again. (if loop is done, our error is output)
return
nil
,
fmt
.
Errorf
(
"dial attempt failed: %s"
,
err
)
}
log
.
Event
(
ctx
,
"swarmDialBackoffClear"
,
logdial
)
s
.
backf
.
Clear
(
p
)
// okay, no longer need to backoff
return
conn
,
nil
}
// dial is the actual swarm's dial logic, gated by Dial.
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help